Example usage for org.apache.hadoop.fs LocatedFileStatus getBlockLocations

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getBlockLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getBlockLocations.

Prototype

public BlockLocation[] getBlockLocations() 

Source Link

Document

Get the file's block locations In HDFS, the returned BlockLocation will have different formats for replicated and erasure coded file.

Usage

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling fileSystem.listLocatedStatus.
 *///from w  w w . j  a va2s.c  o  m
private static void listLocatedStatus(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf());
        RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path);
        if (fs.exists(path)) {
            while (iterator.hasNext()) {
                LocatedFileStatus fileStatus = iterator.next();
                BlockLocation[] locations = fileStatus.getBlockLocations();
                for (BlockLocation loc : locations) {
                    loc.getHosts();
                    loc.getNames();
                }
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Located Status", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private CompletableFuture<?> loadSplits() throws IOException {
    HiveFileIterator files = fileIterators.poll();
    if (files == null) {
        HivePartitionMetadata partition = partitions.poll();
        if (partition == null) {
            return COMPLETED_FUTURE;
        }//from w  ww . ja  v  a 2 s. c  o m
        loadPartition(partition);
        return COMPLETED_FUTURE;
    }

    while (files.hasNext() && !stopped) {
        LocatedFileStatus file = files.next();
        if (isDirectory(file)) {
            if (recursiveDirWalkerEnabled) {
                HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(),
                        files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(),
                        files.getInputFormat(), files.getSchema(), files.getPartitionKeys(),
                        files.getEffectivePredicate(), files.getColumnCoercions());
                fileIterators.add(fileIterator);
            }
        } else {
            boolean splittable = isSplittable(files.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(),
                    file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(),
                    files.getPartitionKeys(), splittable, session, OptionalInt.empty(),
                    files.getEffectivePredicate(), files.getColumnCoercions()));
            if (!future.isDone()) {
                fileIterators.addFirst(files);
                return future;
            }
        }
    }

    // No need to put the iterator back, since it's either empty or we've stopped
    return COMPLETED_FUTURE;
}

From source file:com.facebook.presto.hive.util.AsyncWalker.java

License:Apache License

private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) {
    try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) {
        RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path);

        while (iterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(iterator);

            // ignore hidden files. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }/*from   w w  w  .  jav a  2  s. co m*/
            if (!isDirectory(status)) {
                callback.process(status, status.getBlockLocations());
            } else if (recursive) {
                recursiveWalk(status.getPath(), callback, taskCount, future);
            }
            if (future.isDone()) {
                return;
            }
        }
    } catch (FileNotFoundException e) {
        future.setException(new FileNotFoundException("Partition location does not exist: " + path));
    } catch (Throwable t) {
        future.setException(t);
    } finally {
        if (taskCount.decrementAndGet() == 0) {
            future.set(null);
        }
    }
}

From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java

License:Apache License

private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber,
        boolean splittable) {
    splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath());
    return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(),
            status.getLen(), bucketNumber, splittable);
}

From source file:com.inclouds.hbase.utils.TableLocality.java

License:Open Source License

private static List<BlockLocation> getAllBlockLocations(Path r) throws FileNotFoundException, IOException {
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(r, true);
    List<BlockLocation> list = new ArrayList<BlockLocation>();

    while (it.hasNext()) {
        LocatedFileStatus st = it.next();
        BlockLocation[] locs = st.getBlockLocations();
        for (BlockLocation bl : locs) {
            list.add(bl);/*from  w  w w.j  av  a 2 s.  c o m*/
        }
    }
    return list;
}

From source file:org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.java

License:Apache License

@Override
public List<DataMapDistributable> toDistributable(Segment segment) {
    List<DataMapDistributable> distributables = new ArrayList<>();
    try {//  ww  w  .  jav a2 s.  c o m
        Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(
                segment);
        CarbonFile[] carbonIndexFiles = new CarbonFile[tableBlockIndexUniqueIdentifiers.size()];
        int identifierCounter = 0;
        for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : tableBlockIndexUniqueIdentifiers) {
            String indexFilePath = tableBlockIndexUniqueIdentifier.getIndexFilePath();
            String fileName = tableBlockIndexUniqueIdentifier.getIndexFileName();
            carbonIndexFiles[identifierCounter++] = FileFactory
                    .getCarbonFile(indexFilePath + CarbonCommonConstants.FILE_SEPARATOR + fileName);
        }
        for (int i = 0; i < carbonIndexFiles.length; i++) {
            Path path = new Path(carbonIndexFiles[i].getPath());
            FileSystem fs = path.getFileSystem(FileFactory.getConfiguration());
            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
            LocatedFileStatus fileStatus = iter.next();
            String[] location = fileStatus.getBlockLocations()[0].getHosts();
            BlockletDataMapDistributable distributable = new BlockletDataMapDistributable(path.toString());
            distributable.setLocations(location);
            distributables.add(distributable);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return distributables;
}

From source file:org.apache.impala.catalog.HdfsTable.java

License:Apache License

/**
 * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose
 * location is under the given 'dirPath'. It involves the following steps:
 * - Clear the current block metadata of the partitions.
 * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each
 *   file under it recursively./*  w  w w  .j a v  a 2 s  .c  om*/
 * - For every valid data file, map it to a partition from 'partsByPath' (if one exists)
 *   and enumerate all its blocks and their corresponding hosts and disk IDs.
 * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification
 * (either fully qualified or unqualified), for isDescendantPath().
 * TODO: Split this method into more logical methods for cleaner code.
 */
private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) {
    try {
        FileSystem fs = dirPath.getFileSystem(CONF);
        // No need to load blocks for empty partitions list.
        if (partsByPath.size() == 0 || !fs.exists(dirPath))
            return;
        if (LOG.isTraceEnabled()) {
            LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString());
        }

        // Clear the state of partitions under dirPath since they are going to be updated
        // based on the current snapshot of files in the directory.
        List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath);
        if (dirPathPartitions != null) {
            // The dirPath is a partition directory. This means the path is the root of an
            // unpartitioned table, or the path of at least one partition.
            for (HdfsPartition partition : dirPathPartitions) {
                partition.setFileDescriptors(new ArrayList<FileDescriptor>());
            }
        } else {
            // The dirPath is not a partition directory. We expect it to be an ancestor of
            // partition paths (e.g., the table root). Clear all partitions whose paths are
            // a descendant of dirPath.
            for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) {
                Path partDir = entry.getKey();
                if (!FileSystemUtil.isDescendantPath(partDir, dirPath))
                    continue;
                for (HdfsPartition partition : entry.getValue()) {
                    partition.setFileDescriptors(new ArrayList<FileDescriptor>());
                }
            }
        }

        // For file systems that do not support BlockLocation API, we manually synthesize
        // block location metadata based on file formats.
        if (!FileSystemUtil.supportsStorageIds(fs)) {
            synthesizeBlockMetadata(fs, dirPath, partsByPath);
            return;
        }

        int unknownDiskIdCount = 0;
        RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true);
        while (fileStatusIter.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusIter.next();
            if (!FileSystemUtil.isValidDataFile(fileStatus))
                continue;
            // Find the partition that this file belongs (if any).
            Path partPathDir = fileStatus.getPath().getParent();
            Preconditions.checkNotNull(partPathDir);

            List<HdfsPartition> partitions = partsByPath.get(partPathDir);
            // Skip if this file does not belong to any known partition.
            if (partitions == null) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond "
                            + " to a known partition. Skipping metadata load for this file.");
                }
                continue;
            }
            String fileName = fileStatus.getPath().getName();
            FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(),
                    fileStatus.getModificationTime());
            BlockLocation[] locations = fileStatus.getBlockLocations();
            String partPathDirName = partPathDir.toString();
            for (BlockLocation loc : locations) {
                Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts());
                // Enumerate all replicas of the block, adding any unknown hosts
                // to hostIndex_. We pick the network address from getNames() and
                // map it to the corresponding hostname from getHosts().
                List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length);
                for (int i = 0; i < loc.getNames().length; ++i) {
                    TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]);
                    replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress),
                            cachedHosts.contains(loc.getHosts()[i])));
                }
                FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas);
                THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift();
                fd.addThriftFileBlock(tHdfsFileBlock);
                unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock);
            }
            if (LOG.isTraceEnabled()) {
                LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName);
            }
            // Update the partitions' metadata that this file belongs to.
            for (HdfsPartition partition : partitions) {
                partition.getFileDescriptors().add(fd);
                numHdfsFiles_++;
                totalHdfsBytes_ += fd.getFileLength();
            }
        }
        if (unknownDiskIdCount > 0) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount);
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(
                "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e);
    }
}