List of usage examples for org.apache.hadoop.fs BlockLocation getCachedHosts
public String[] getCachedHosts()
From source file:ch.cern.db.hdfs.Main.java
License:GNU General Public License
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException { System.out.println(" Offset: " + blockLocation.getOffset()); System.out.println(" Length: " + blockLocation.getLength()); String[] cachedHosts = blockLocation.getCachedHosts(); if (cachedHosts.length == 0) { System.out.println(" No cached hosts"); }/*from w w w .ja v a 2s. c om*/ System.out.println(" Replicas:"); VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ? (((BlockStorageLocation) blockLocation).getVolumeIds()) : null; String[] hosts = blockLocation.getHosts(); String[] names = blockLocation.getNames(); String[] topologyPaths = blockLocation.getTopologyPaths(); for (int i = 0; i < topologyPaths.length; i++) { int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1; System.out.println(" Replica (" + i + "):"); System.out.println(" Host: " + hosts[i]); if (diskId == -1) System.out.println(" DiskId: unknown"); else if (dataDirs != null && diskId < dataDirs.length) System.out.println(" Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")"); else System.out.println(" DiskId: " + diskId); System.out.println(" Name: " + names[i]); System.out.println(" TopologyPaths: " + topologyPaths[i]); } if (cachedHosts.length > 0) { System.out.println(" Cached hosts:"); for (String cachedHost : cachedHosts) { System.out.println(" Host: " + cachedHost); } } }
From source file:com.cloudera.GetBlockLocations.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); String url = getStringOrDie("get.block.locations.path"); final FileSystem fs = FileSystem.get(new URI(url), conf); if (!fs.exists(new Path(url))) { System.out.println("no file at " + url); System.exit(1);//w ww.java 2 s. co m } BlockLocation locs[] = null; try { locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE); } catch (IOException e) { System.out.println("Error calling getFileBlockLocations(" + url + ")\n"); e.printStackTrace(System.err); System.exit(1); } String prefix = ""; for (BlockLocation loc : locs) { System.out.println(prefix); System.out.println("{"); System.out.println(" hosts = " + Arrays.toString(loc.getHosts())); System.out.println(" cachedHosts = " + Arrays.toString(loc.getCachedHosts())); System.out.println(" names = " + Arrays.toString(loc.getNames())); System.out.println(" topologyPaths = " + Arrays.toString(loc.getTopologyPaths())); System.out.println(" offset = " + loc.getOffset()); System.out.println(" length = " + loc.getLength()); System.out.println(" corrupt = " + loc.isCorrupt()); System.out.println("}"); prefix = ","; } }
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports * from BlockLocation, and get all replicas' host id from hostIndex. * * Must be threadsafe. Access to 'hostIndex' must be protected. *//*from w w w . java2 s . c o m*/ private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex) throws IOException { // Get the location of all block replicas in ip:port format. String[] blockHostPorts = loc.getNames(); // Get the hostnames for all block replicas. Used to resolve which hosts // contain cached data. The results are returned in the same order as // block.getNames() so it allows us to match a host specified as ip:port to // corresponding hostname using the same array index. String[] blockHostNames = loc.getHosts(); Preconditions.checkState(blockHostNames.length == blockHostPorts.length); // Get the hostnames that contain cached replicas of this block. Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts())); Preconditions.checkState(cachedHosts.size() <= blockHostNames.length); // Now enumerate all replicas of the block, adding any unknown hosts // to hostMap_/hostList_. The host ID (index in to the hostList_) for each // replica is stored in replicaHostIdxs. List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length); for (int i = 0; i < blockHostPorts.length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]); Preconditions.checkState(networkAddress != null); networkAddress.setHdfs_host_name(blockHostNames[i]); int idx = -1; synchronized (hostIndex) { idx = hostIndex.getIndex(networkAddress); } replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i]))); } return new FileBlock(loc.getOffset(), loc.getLength(), replicas); }
From source file:org.apache.impala.catalog.HdfsTable.java
License:Apache License
/** * Drops and re-loads the block metadata for all partitions in 'partsByPath' whose * location is under the given 'dirPath'. It involves the following steps: * - Clear the current block metadata of the partitions. * - Call FileSystem.listStatus() on 'dirPath' to fetch the BlockLocations for each * file under it recursively./* w w w. java 2 s .c o m*/ * - For every valid data file, map it to a partition from 'partsByPath' (if one exists) * and enumerate all its blocks and their corresponding hosts and disk IDs. * Requires that 'dirPath' and all paths in 'partsByPath' have consistent qualification * (either fully qualified or unqualified), for isDescendantPath(). * TODO: Split this method into more logical methods for cleaner code. */ private void loadBlockMetadata(Path dirPath, HashMap<Path, List<HdfsPartition>> partsByPath) { try { FileSystem fs = dirPath.getFileSystem(CONF); // No need to load blocks for empty partitions list. if (partsByPath.size() == 0 || !fs.exists(dirPath)) return; if (LOG.isTraceEnabled()) { LOG.trace("Loading block md for " + name_ + " directory " + dirPath.toString()); } // Clear the state of partitions under dirPath since they are going to be updated // based on the current snapshot of files in the directory. List<HdfsPartition> dirPathPartitions = partsByPath.get(dirPath); if (dirPathPartitions != null) { // The dirPath is a partition directory. This means the path is the root of an // unpartitioned table, or the path of at least one partition. for (HdfsPartition partition : dirPathPartitions) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } else { // The dirPath is not a partition directory. We expect it to be an ancestor of // partition paths (e.g., the table root). Clear all partitions whose paths are // a descendant of dirPath. for (Map.Entry<Path, List<HdfsPartition>> entry : partsByPath.entrySet()) { Path partDir = entry.getKey(); if (!FileSystemUtil.isDescendantPath(partDir, dirPath)) continue; for (HdfsPartition partition : entry.getValue()) { partition.setFileDescriptors(new ArrayList<FileDescriptor>()); } } } // For file systems that do not support BlockLocation API, we manually synthesize // block location metadata based on file formats. if (!FileSystemUtil.supportsStorageIds(fs)) { synthesizeBlockMetadata(fs, dirPath, partsByPath); return; } int unknownDiskIdCount = 0; RemoteIterator<LocatedFileStatus> fileStatusIter = fs.listFiles(dirPath, true); while (fileStatusIter.hasNext()) { LocatedFileStatus fileStatus = fileStatusIter.next(); if (!FileSystemUtil.isValidDataFile(fileStatus)) continue; // Find the partition that this file belongs (if any). Path partPathDir = fileStatus.getPath().getParent(); Preconditions.checkNotNull(partPathDir); List<HdfsPartition> partitions = partsByPath.get(partPathDir); // Skip if this file does not belong to any known partition. if (partitions == null) { if (LOG.isTraceEnabled()) { LOG.trace("File " + fileStatus.getPath().toString() + " doesn't correspond " + " to a known partition. Skipping metadata load for this file."); } continue; } String fileName = fileStatus.getPath().getName(); FileDescriptor fd = new FileDescriptor(fileName, fileStatus.getLen(), fileStatus.getModificationTime()); BlockLocation[] locations = fileStatus.getBlockLocations(); String partPathDirName = partPathDir.toString(); for (BlockLocation loc : locations) { Set<String> cachedHosts = Sets.newHashSet(loc.getCachedHosts()); // Enumerate all replicas of the block, adding any unknown hosts // to hostIndex_. We pick the network address from getNames() and // map it to the corresponding hostname from getHosts(). List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(loc.getNames().length); for (int i = 0; i < loc.getNames().length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(loc.getNames()[i]); replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress), cachedHosts.contains(loc.getHosts()[i]))); } FileBlock currentBlock = new FileBlock(loc.getOffset(), loc.getLength(), replicas); THdfsFileBlock tHdfsFileBlock = currentBlock.toThrift(); fd.addThriftFileBlock(tHdfsFileBlock); unknownDiskIdCount += loadDiskIds(loc, tHdfsFileBlock); } if (LOG.isTraceEnabled()) { LOG.trace("Adding file md dir: " + partPathDirName + " file: " + fileName); } // Update the partitions' metadata that this file belongs to. for (HdfsPartition partition : partitions) { partition.getFileDescriptors().add(fd); numHdfsFiles_++; totalHdfsBytes_ += fd.getFileLength(); } } if (unknownDiskIdCount > 0) { if (LOG.isWarnEnabled()) { LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount); } } } catch (IOException e) { throw new RuntimeException( "Error loading block metadata for directory " + dirPath.toString() + ": " + e.getMessage(), e); } }