List of usage examples for org.apache.hadoop.fs BlockLocation getLength
public long getLength()
From source file:RawParascaleFileSystem.java
License:Apache License
/** * {@inheritDoc}/* w w w .ja v a 2 s . c o m*/ */ @Override public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len) throws IOException { ChunkLocator newChunkLocator = null; if (file.getLen() < start + len) { throw new IOException("start+len must be less or equal than file length"); } final ArrayList<BlockLocation> locations = new ArrayList<BlockLocation>(); try { newChunkLocator = newChunkLocator(); final Path makeQualified = file.getPath().makeQualified(this.getUri(), this.getWorkingDirectory()); // sorted by offset final ChunkLocation[] chunkLocations = newChunkLocator.getChunkLocations(pathToFile(makeQualified), getVirtualFSFromPath(makeQualified, true)); long begin = start; long length = len; for (final ChunkLocation chunkLocation : chunkLocations) { final ChunkInfo chunkInfo = chunkLocation.getChunkInfo(); final StorageNodeInfo[] storageNodeInfo = chunkLocation.getStorageNodeInfo(); if (length <= 0) { // stop when length exceeded break; } if (begin < chunkInfo.getChunkOffset()) { // skip if location not reached yet continue; } final List<String> hosts = new ArrayList<String>(0); for (int j = 0; j < storageNodeInfo.length; j++) { // select all enabled and running nodes if (storageNodeInfo[j].isUp() && storageNodeInfo[j].isEnabled()) { hosts.add(storageNodeInfo[j].getNodeName()); } } final long lengthInChunk = chunkInfo.getChunkLength() - (begin - chunkInfo.getChunkOffset()); final BlockLocation blockLocation = new BlockLocation(null, hosts.toArray(new String[0]), begin, lengthInChunk < length ? lengthInChunk : length); begin += blockLocation.getLength(); length -= blockLocation.getLength(); locations.add(blockLocation); } if (pLog.isDebugEnabled()) { pLog.debug("Fetched " + locations.size() + " chunk locations for " + makeQualified); } return locations.toArray(new BlockLocation[0]); } catch (final ChunkStorageException e) { throw new IOException( "can not fetch chunk locations " + newChunkLocator == null ? "" : newChunkLocator.toString(), e); } finally { if (newChunkLocator != null) { newChunkLocator.close(); } } }
From source file:ch.cern.db.hdfs.Main.java
License:GNU General Public License
private void printBlockMetadata(BlockLocation blockLocation, String[] dataDirs) throws IOException { System.out.println(" Offset: " + blockLocation.getOffset()); System.out.println(" Length: " + blockLocation.getLength()); String[] cachedHosts = blockLocation.getCachedHosts(); if (cachedHosts.length == 0) { System.out.println(" No cached hosts"); }/* w w w . ja v a 2s . c o m*/ System.out.println(" Replicas:"); VolumeId[] volumeIds = blockLocation instanceof BlockStorageLocation ? (((BlockStorageLocation) blockLocation).getVolumeIds()) : null; String[] hosts = blockLocation.getHosts(); String[] names = blockLocation.getNames(); String[] topologyPaths = blockLocation.getTopologyPaths(); for (int i = 0; i < topologyPaths.length; i++) { int diskId = volumeIds != null ? DistributedFileSystemMetadata.getDiskId(volumeIds[i]) : -1; System.out.println(" Replica (" + i + "):"); System.out.println(" Host: " + hosts[i]); if (diskId == -1) System.out.println(" DiskId: unknown"); else if (dataDirs != null && diskId < dataDirs.length) System.out.println(" Location: " + dataDirs[diskId] + " (DiskId: " + diskId + ")"); else System.out.println(" DiskId: " + diskId); System.out.println(" Name: " + names[i]); System.out.println(" TopologyPaths: " + topologyPaths[i]); } if (cachedHosts.length > 0) { System.out.println(" Cached hosts:"); for (String cachedHost : cachedHosts) { System.out.println(" Host: " + cachedHost); } } }
From source file:co.cask.cdap.data.stream.StreamDataFileSplitter.java
License:Apache License
/** * Returns the array index of the given blockLocations that contains the given offset. * * @param blockLocations Array of {@link BlockLocation} to search for. * @param offset File offset./*from w ww .ja va2 s. co m*/ * @param startIdx Starting index for the search in the array. * @return The array index of the {@link BlockLocation} that contains the given offset. */ private int getBlockIndex(BlockLocation[] blockLocations, long offset, int startIdx) { if (blockLocations == null) { return -1; } for (int i = startIdx; i < blockLocations.length; i++) { BlockLocation blockLocation = blockLocations[i]; long endOffset = blockLocation.getOffset() + blockLocation.getLength(); if (blockLocation.getOffset() <= offset && offset < endOffset) { return i; } } return -1; }
From source file:com.asakusafw.runtime.directio.hadoop.BlockMap.java
License:Apache License
/** * Returns a list of {@link BlockInfo} for the target file. * @param fs the target file//from w w w . j av a 2 s . co m * @param status the target file status * @return the computed information * @throws IOException if failed to compute information */ public static List<BlockInfo> computeBlocks(FileSystem fs, FileStatus status) throws IOException { BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen()); List<BlockInfo> results = new ArrayList<>(); for (BlockLocation location : locations) { long length = location.getLength(); long start = location.getOffset(); results.add(new BlockInfo(start, start + length, location.getHosts())); } return results; }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { for (int i = 0; i < blkLocations.length; i++) { // is the offset inside this block? if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) { return i; }//from w w w. ja v a 2 s . c o m } BlockLocation last = blkLocations[blkLocations.length - 1]; long fileLength = last.getOffset() + last.getLength() - 1; throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")"); }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * @param blkLocations// w w w . j a va2 s.com * The locations of the current blocks * @param offset * The start offset of file associated with this block * @return The block index */ protected int getBlockIndex(BlockLocation[] blkLocations, long offset) { for (int i = 0; i < blkLocations.length; i++) { // is the offset inside this block? if ((blkLocations[i].getOffset() <= offset) && (offset < blkLocations[i].getOffset() + blkLocations[i].getLength())) { return i; } } BlockLocation last = blkLocations[blkLocations.length - 1]; long fileLength = last.getOffset() + last.getLength() - 1; throw new IllegalArgumentException("Offset " + offset + " is outside of file (0.." + fileLength + ")"); }
From source file:com.cloudera.GetBlockLocations.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); String url = getStringOrDie("get.block.locations.path"); final FileSystem fs = FileSystem.get(new URI(url), conf); if (!fs.exists(new Path(url))) { System.out.println("no file at " + url); System.exit(1);/*from w w w .ja va2 s. co m*/ } BlockLocation locs[] = null; try { locs = fs.getFileBlockLocations(new Path(url), 0, Long.MAX_VALUE); } catch (IOException e) { System.out.println("Error calling getFileBlockLocations(" + url + ")\n"); e.printStackTrace(System.err); System.exit(1); } String prefix = ""; for (BlockLocation loc : locs) { System.out.println(prefix); System.out.println("{"); System.out.println(" hosts = " + Arrays.toString(loc.getHosts())); System.out.println(" cachedHosts = " + Arrays.toString(loc.getCachedHosts())); System.out.println(" names = " + Arrays.toString(loc.getNames())); System.out.println(" topologyPaths = " + Arrays.toString(loc.getTopologyPaths())); System.out.println(" offset = " + loc.getOffset()); System.out.println(" length = " + loc.getLength()); System.out.println(" corrupt = " + loc.isCorrupt()); System.out.println("}"); prefix = ","; } }
From source file:com.cloudera.impala.catalog.HdfsTable.java
License:Apache License
/** * Loads the file block metadata for the given collection of FileDescriptors. The * FileDescriptors are passed as a tree, where the first level is indexed by * filesystem, the second level is indexed by partition location, and the leaves are * the list of files that exist under each directory. *//* w w w .j a va 2 s . com*/ private void loadBlockMd(Map<FsKey, Map<String, List<FileDescriptor>>> perFsFileDescs) throws RuntimeException { Preconditions.checkNotNull(perFsFileDescs); LOG.debug("load block md for " + name_); for (FsKey fsEntry : perFsFileDescs.keySet()) { FileSystem fs = fsEntry.filesystem; // Store all BlockLocations so they can be reused when loading the disk IDs. List<BlockLocation> blockLocations = Lists.newArrayList(); int numCachedBlocks = 0; Map<String, List<FileDescriptor>> partitionToFds = perFsFileDescs.get(fsEntry); Preconditions.checkNotNull(partitionToFds); // loop over all files and record their block metadata, minus volume ids for (String partitionDir : partitionToFds.keySet()) { Path partDirPath = new Path(partitionDir); for (FileDescriptor fileDescriptor : partitionToFds.get(partitionDir)) { Path p = new Path(partDirPath, fileDescriptor.getFileName()); try { FileStatus fileStatus = fs.getFileStatus(p); // fileDescriptors should not contain directories. Preconditions.checkArgument(!fileStatus.isDirectory()); BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); Preconditions.checkNotNull(locations); blockLocations.addAll(Arrays.asList(locations)); // Loop over all blocks in the file. for (BlockLocation block : locations) { String[] blockHostPorts = block.getNames(); try { blockHostPorts = block.getNames(); } catch (IOException e) { // this shouldn't happen, getNames() doesn't throw anything String errorMsg = "BlockLocation.getNames() failed:\n" + e.getMessage(); LOG.error(errorMsg); throw new IllegalStateException(errorMsg); } // Now enumerate all replicas of the block, adding any unknown hosts to // hostIndex_ and the index for that host to replicaHostIdxs. List<Integer> replicaHostIdxs = new ArrayList<Integer>(blockHostPorts.length); for (int i = 0; i < blockHostPorts.length; ++i) { String[] ip_port = blockHostPorts[i].split(":"); Preconditions.checkState(ip_port.length == 2); TNetworkAddress network_address = new TNetworkAddress(ip_port[0], Integer.parseInt(ip_port[1])); replicaHostIdxs.add(hostIndex_.getIndex(network_address)); } fileDescriptor.addFileBlock( new FileBlock(block.getOffset(), block.getLength(), replicaHostIdxs)); } } catch (IOException e) { throw new RuntimeException( "couldn't determine block locations for path '" + p + "':\n" + e.getMessage(), e); } } } if (SUPPORTS_VOLUME_ID && fs instanceof DistributedFileSystem) { LOG.trace("loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() + ". file system: " + fsEntry); loadDiskIds((DistributedFileSystem) fs, blockLocations, partitionToFds); LOG.trace("completed load of disk ids for: " + getFullName()); } } }
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Create FileBlock according to BlockLocation and hostIndex. Get host names and ports * from BlockLocation, and get all replicas' host id from hostIndex. * * Must be threadsafe. Access to 'hostIndex' must be protected. */// w w w . j a v a2 s .co m private static FileBlock createFileBlock(BlockLocation loc, ListMap<TNetworkAddress> hostIndex) throws IOException { // Get the location of all block replicas in ip:port format. String[] blockHostPorts = loc.getNames(); // Get the hostnames for all block replicas. Used to resolve which hosts // contain cached data. The results are returned in the same order as // block.getNames() so it allows us to match a host specified as ip:port to // corresponding hostname using the same array index. String[] blockHostNames = loc.getHosts(); Preconditions.checkState(blockHostNames.length == blockHostPorts.length); // Get the hostnames that contain cached replicas of this block. Set<String> cachedHosts = Sets.newHashSet(Arrays.asList(loc.getCachedHosts())); Preconditions.checkState(cachedHosts.size() <= blockHostNames.length); // Now enumerate all replicas of the block, adding any unknown hosts // to hostMap_/hostList_. The host ID (index in to the hostList_) for each // replica is stored in replicaHostIdxs. List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(blockHostPorts.length); for (int i = 0; i < blockHostPorts.length; ++i) { TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]); Preconditions.checkState(networkAddress != null); networkAddress.setHdfs_host_name(blockHostNames[i]); int idx = -1; synchronized (hostIndex) { idx = hostIndex.getIndex(networkAddress); } replicas.add(new BlockReplica(idx, cachedHosts.contains(blockHostNames[i]))); } return new FileBlock(loc.getOffset(), loc.getLength(), replicas); }
From source file:com.cloudera.kitten.appmaster.util.HDFSFileFinder.java
License:Open Source License
public static Map<String, Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf) throws IOException { FileSystem fs = p.getFileSystem(conf); HashMap<String, Long> bytesHeld = Maps.newHashMap(); for (FileStatus f : fs.globStatus(p)) { BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen()); if (bls.length > 0) { for (BlockLocation bl : bls) { long l = bl.getLength(); for (String name : bl.getNames()) { if (bytesHeld.containsKey(name)) bytesHeld.put(name, bytesHeld.get(name) + l); else bytesHeld.put(name, l); }/* ww w .j a v a2 s . c om*/ } } } return bytesHeld; }