List of usage examples for org.apache.hadoop.fs LocatedFileStatus getBlockLocations
public BlockLocation[] getBlockLocations()
From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java
License:Apache License
/** * List file status by calling fileSystem.listLocatedStatus. *///from w w w . j a va2s.c o m private static void listLocatedStatus(String dirPath) { Path path = new Path(dirPath); boolean exceptionThrown = false; try { FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf()); RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path); if (fs.exists(path)) { while (iterator.hasNext()) { LocatedFileStatus fileStatus = iterator.next(); BlockLocation[] locations = fileStatus.getBlockLocations(); for (BlockLocation loc : locations) { loc.getHosts(); loc.getNames(); } } } } catch (IOException e) { exceptionThrown = true; LOG.error("Failed to list Located Status", e); } assertFalse(exceptionThrown); }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private CompletableFuture<?> loadSplits() throws IOException { HiveFileIterator files = fileIterators.poll(); if (files == null) { HivePartitionMetadata partition = partitions.poll(); if (partition == null) { return COMPLETED_FUTURE; }//from w ww . ja v a 2 s. c o m loadPartition(partition); return COMPLETED_FUTURE; } while (files.hasNext() && !stopped) { LocatedFileStatus file = files.next(); if (isDirectory(file)) { if (recursiveDirWalkerEnabled) { HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(), files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(), files.getInputFormat(), files.getSchema(), files.getPartitionKeys(), files.getEffectivePredicate(), files.getColumnCoercions()); fileIterators.add(fileIterator); } } else { boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath()); CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(), files.getPartitionKeys(), splittable, session, OptionalInt.empty(), files.getEffectivePredicate(), files.getColumnCoercions())); if (!future.isDone()) { fileIterators.addFirst(files); return future; } } } // No need to put the iterator back, since it's either empty or we've stopped return COMPLETED_FUTURE; }
From source file:com.facebook.presto.hive.util.AsyncWalker.java
License:Apache License
private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) { try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) { RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path); while (iterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(iterator); // ignore hidden files. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; }/*from w w w . jav a 2 s. co m*/ if (!isDirectory(status)) { callback.process(status, status.getBlockLocations()); } else if (recursive) { recursiveWalk(status.getPath(), callback, taskCount, future); } if (future.isDone()) { return; } } } catch (FileNotFoundException e) { future.setException(new FileNotFoundException("Partition location does not exist: " + path)); } catch (Throwable t) { future.setException(t); } finally { if (taskCount.decrementAndGet() == 0) { future.set(null); } } }
From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java
License:Apache License
private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber, boolean splittable) { splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath()); return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(), status.getLen(), bucketNumber, splittable); }
From source file:com.inclouds.hbase.utils.TableLocality.java
License:Open Source License
private static List<BlockLocation> getAllBlockLocations(Path r) throws FileNotFoundException, IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(r, true); List<BlockLocation> list = new ArrayList<BlockLocation>(); while (it.hasNext()) { LocatedFileStatus st = it.next(); BlockLocation[] locs = st.getBlockLocations(); for (BlockLocation bl : locs) { list.add(bl);/*from w w w.j av a 2 s. c o m*/ } } return list; }
From source file:org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.java
License:Apache License
@Override public List<DataMapDistributable> toDistributable(Segment segment) { List<DataMapDistributable> distributables = new ArrayList<>(); try {// ww w . jav a2 s. c o m Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers( segment); CarbonFile[] carbonIndexFiles = new CarbonFile[tableBlockIndexUniqueIdentifiers.size()]; int identifierCounter = 0; for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : tableBlockIndexUniqueIdentifiers) { String indexFilePath = tableBlockIndexUniqueIdentifier.getIndexFilePath(); String fileName = tableBlockIndexUniqueIdentifier.getIndexFileName(); carbonIndexFiles[identifierCounter++] = FileFactory .getCarbonFile(indexFilePath + CarbonCommonConstants.FILE_SEPARATOR + fileName); } for (int i = 0; i < carbonIndexFiles.length; i++) { Path path = new Path(carbonIndexFiles[i].getPath()); FileSystem fs = path.getFileSystem(FileFactory.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); LocatedFileStatus fileStatus = iter.next(); String[] location = fileStatus.getBlockLocations()[0].getHosts(); BlockletDataMapDistributable distributable = new BlockletDataMapDistributable(path.toString()); distributable.setLocations(location); distributables.add(distributable); } } catch (IOException e) { throw new RuntimeException(e); } return distributables; }
From source file:org.apache.impala.catalog.HdfsTable.java
License:Apache License
/** * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose * location is under the given 'dirPath'. It involves the following steps: * - Clear the current block metadata of the partitions. * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each * file under it recursively./* w w w .j a v a 2 s .c om*/ * - For every valid data file, map it to a partition from 'partsByPath' (if one exists) * and enumerate all its blocks and their corresponding hosts and disk IDs. * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification * (either fully qualified or unqualified), for isDescendantPath(). * TODO: Split this method into more logical methods for cleaner code. */ private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) { try { FileSystem fs = dirPath.getFileSystem(CONF); // No need to load blocks for empty partitions list. if (partsByPath.size() == 0 || !fs.exists(dirPath)) return; if (LOG.isTraceEnabled()) { LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString()); } // Clear the state of partitions under dirPath since they are going to be updated // based on the current snapshot of files in the directory. List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath); if (dirPathPartitions != null) { // The dirPath is a partition directory. This means the path is the root of an // unpartitioned table, or the path of at least one partition. for (HdfsPartition partition : dirPathPartitions) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } else { // The dirPath is not a partition directory. We expect it to be an ancestor of // partition paths (e.g., the table root). Clear all partitions whose paths are // a descendant of dirPath. for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) { Path partDir = entry.getKey(); if (!FileSystemUtil.isDescendantPath(partDir, dirPath)) continue; for (HdfsPartition partition : entry.getValue()) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } } // For file systems that do not support BlockLocation API, we manually synthesize // block location metadata based on file formats. if (!FileSystemUtil.supportsStorageIds(fs)) { synthesizeBlockMetadata(fs, dirPath, partsByPath); return; } int unknownDiskIdCount = 0; RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; // Find the partition that this file belongs (if any). Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); BlockLocation[] locations = fileStatus.getBlockLocations(); String partPathDirName = partPathDir.toString(); for (BlockLocation loc : locations) { Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts()); // Enumerate all replicas of the block, adding any unknown hosts // to hostIndex_. We pick the network address from getNames() and // map it to the corresponding hostname from getHosts(). List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length); for (int i = 0; i < loc.getNames().length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]); replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress), cachedHosts.contains(loc.getHosts()[i]))); } FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas); THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift(); fd.addThriftFileBlock(tHdfsFileBlock); unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock); } if (LOG.isTraceEnabled()) { LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName); } // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } if (unknownDiskIdCount > 0) { if (LOG.isWarnEnabled()) { LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount); } } } catch (IOException e) { throw new RuntimeException( "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e); } }