List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlock getLocations
public DatanodeInfo[] getLocations()
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
private void addExtendedBlocksFromNameNode(BackupReportWriter writer, ExternalExtendedBlockSort<Addresses> nameNodeBlocks, DFSClient client, FileStatus fs, Set<Path> pathSetToIgnore) throws IOException { Path qualifiedPath = fileSystem.makeQualified(fs.getPath()); if (shouldIgnore(pathSetToIgnore, qualifiedPath)) { return;//from w ww.ja va 2 s. c o m } String src = qualifiedPath.toUri().getPath(); long start = 0; long length = fs.getLen(); LocatedBlocks locatedBlocks = client.getLocatedBlocks(src, start, length); for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) { DatanodeInfo[] locations = locatedBlock.getLocations(); ExtendedBlock extendedBlock = BackupUtil.fromHadoop(locatedBlock.getBlock()); Addresses addresses = new Addresses(locations); nameNodeBlocks.add(extendedBlock, addresses); writer.statusExtendedBlocksFromNameNode(src, extendedBlock, locations); } }
From source file:com.bigstep.datalake.JsonUtil.java
License:Apache License
/** Convert a LocatedBlock to a Json map. */ private static Map<String, Object> toJsonMap(final LocatedBlock locatedblock) throws IOException { if (locatedblock == null) { return null; }//w w w . j ava 2s.co m final Map<String, Object> m = new TreeMap<String, Object>(); m.put("blockToken", toJsonMap(locatedblock.getBlockToken())); m.put("isCorrupt", locatedblock.isCorrupt()); m.put("startOffset", locatedblock.getStartOffset()); m.put("block", toJsonMap(locatedblock.getBlock())); m.put("locations", toJsonArray(locatedblock.getLocations())); m.put("cachedLocations", toJsonArray(locatedblock.getCachedLocations())); return m; }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get block location information about a list of {@link HdfsBlockLocation}. * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to * get {@link BlockStorageLocation}s for blocks returned by * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)} . * /* w w w . j a v a 2 s . c o m*/ * This is done by making a round of RPCs to the associated datanodes, asking * the volume of each block replica. The returned array of {@link BlockStorageLocation} expose this information as a {@link VolumeId}. * * @param blockLocations * target blocks on which to query volume location information * @return volumeBlockLocations original block array augmented with additional * volume location information for each replica. */ public BlockStorageLocation[] getBlockStorageLocations(List<BlockLocation> blockLocations) throws IOException, UnsupportedOperationException, InvalidBlockTokenException { if (!getConf().isHdfsBlocksMetadataEnabled()) { throw new UnsupportedOperationException("Datanode-side support for " + "getVolumeBlockLocations() must also be enabled in the client " + "configuration."); } // Downcast blockLocations and fetch out required LocatedBlock(s) List<LocatedBlock> blocks = new ArrayList<LocatedBlock>(); for (BlockLocation loc : blockLocations) { if (!(loc instanceof HdfsBlockLocation)) { throw new ClassCastException( "DFSClient#getVolumeBlockLocations " + "expected to be passed HdfsBlockLocations"); } HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc; blocks.add(hdfsLoc.getLocatedBlock()); } // Re-group the LocatedBlocks to be grouped by datanodes, with the values // a list of the LocatedBlocks on the datanode. Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>(); for (LocatedBlock b : blocks) { for (DatanodeInfo info : b.getLocations()) { if (!datanodeBlocks.containsKey(info)) { datanodeBlocks.put(info, new ArrayList<LocatedBlock>()); } List<LocatedBlock> l = datanodeBlocks.get(info); l.add(b); } } // Make RPCs to the datanodes to get volume locations for its replicas TraceScope scope = Trace.startSpan("getBlockStorageLocations", traceSampler); Map<DatanodeInfo, HdfsBlocksMetadata> metadatas; try { metadatas = BlockStorageLocationUtilBridge.queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks, getConf().getFileBlockStorageLocationsNumThreads(), getConf().getFileBlockStorageLocationsTimeoutMs(), getConf().getConnectToDnViaHostname()); if (LOG.isTraceEnabled()) { LOG.trace("metadata returned: " + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas)); } } finally { scope.close(); } // Regroup the returned VolumeId metadata to again be grouped by // LocatedBlock rather than by datanode Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtilBridge .associateVolumeIdsWithBlocks(blocks, metadatas); // Combine original BlockLocations with new VolumeId information BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtilBridge .convertToVolumeBlockLocations(blocks, blockVolumeIds); return volumeBlockLocations; }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get the checksum of the whole file of a range of the file. Note that the * range always starts from the beginning of the file. * // ww w . j a v a2s . c om * @param src * The file path * @param length * the length of the range, i.e., the range is [0, length] * @return The checksum * @see DistributedFileSystem#getFileChecksum(Path) */ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException { checkOpen(); Preconditions.checkArgument(length >= 0); // get block locations for the file range LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); final DataOutputBuffer md5out = new DataOutputBuffer(); int bytesPerCRC = -1; DataChecksum.Type crcType = DataChecksum.Type.DEFAULT; long crcPerBlock = 0; boolean refetchBlocks = false; int lastRetriedIndex = -1; // get block checksum for each block long remaining = length; if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { remaining = Math.min(length, blockLocations.getFileLength()); } for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) { if (refetchBlocks) { // refetch to get fresh tokens blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } locatedblocks = blockLocations.getLocatedBlocks(); refetchBlocks = false; } LocatedBlock lb = locatedblocks.get(i); final ExtendedBlock block = lb.getBlock(); if (remaining < block.getNumBytes()) { block.setNumBytes(remaining); } remaining -= block.getNumBytes(); final DatanodeInfo[] datanodes = lb.getLocations(); // try each datanode location of the block final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout(); boolean done = false; for (int j = 0; !done && j < datanodes.length; j++) { DataOutputStream out = null; DataInputStream in = null; try { // connect to a datanode IOStreamPair pair = connectToDN(datanodes[j], timeout, lb); out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); in = new DataInputStream(pair.in); if (LOG.isDebugEnabled()) { LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block); } // get block MD5 new Sender(out).blockChecksum(block, lb.getBlockToken()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "for block " + block + " from datanode " + datanodes[j]; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse(); // read byte-per-checksum final int bpc = checksumData.getBytesPerCrc(); if (i == 0) { // first block bytesPerCRC = bpc; } else if (bpc != bytesPerCRC) { throw new IOException( "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC); } // read crc-per-block final long cpb = checksumData.getCrcPerBlock(); if (locatedblocks.size() > 1 && i == 0) { crcPerBlock = cpb; } // read md5 final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray()); md5.write(md5out); // read crc-type final DataChecksum.Type ct; if (checksumData.hasCrcType()) { ct = PBHelper.convert(checksumData.getCrcType()); } else { LOG.debug("Retrieving checksum from an earlier-version DataNode: " + "inferring checksum by reading first byte"); ct = inferChecksumTypeByReading(lb, datanodes[j]); } if (i == 0) { // first block crcType = ct; } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) { // if crc types are mixed in a file crcType = DataChecksum.Type.MIXED; } done = true; if (LOG.isDebugEnabled()) { if (i == 0) { LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock); } LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5); } } catch (InvalidBlockTokenException ibte) { if (i > lastRetriedIndex) { if (LOG.isDebugEnabled()) { LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src + " for block " + block + " from datanode " + datanodes[j] + ". Will retry the block once."); } lastRetriedIndex = i; done = true; // actually it's not done; but we'll retry i--; // repeat at i-th block refetchBlocks = true; break; } } catch (IOException ie) { LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } if (!done) { throw new IOException("Fail to get block MD5 for " + block); } } // compute file MD5 final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); switch (crcType) { case CRC32: return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); case CRC32C: return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); default: // If there is no block allocated for the file, // return one with the magic entry that matches what previous // hdfs versions return. if (locatedblocks.size() == 0) { return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5); } // we should never get here since the validity was checked // when getCrcType() was called above. return null; } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException { final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0); if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("newInfo = " + newInfo); }/* w w w . j a v a 2s . c om*/ if (newInfo == null) { throw new IOException("Cannot open filename " + src); } if (locatedBlocks != null) { Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator(); Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator(); while (oldIter.hasNext() && newIter.hasNext()) { if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) { throw new IOException("Blocklist for " + src + " has changed!"); } } } locatedBlocks = newInfo; long lastBlockBeingWrittenLength = 0; if (!locatedBlocks.isLastBlockComplete()) { final LocatedBlock last = locatedBlocks.getLastLocatedBlock(); if (last != null) { if (last.getLocations().length == 0) { if (last.getBlockSize() == 0) { // if the length is zero, then no data has been written to // datanode. So no need to wait for the locations. return 0; } return -1; } final long len = readBlockLength(last); last.getBlock().setNumBytes(len); lastBlockBeingWrittenLength = len; } } fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo(); return lastBlockBeingWrittenLength; }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** Read the block length from one of the datanodes. */ private long readBlockLength(LocatedBlock locatedblock) throws IOException { assert locatedblock != null : "LocatedBlock cannot be null"; int replicaNotFoundCount = locatedblock.getLocations().length; for (DatanodeInfo datanode : locatedblock.getLocations()) { ClientDatanodeProtocol cdp = null; try {/*from ww w. j av a2s .c om*/ cdp = DFSUtil.createClientDatanodeProtocolProxy(datanode, dfsClient.getConfiguration(), dfsClient.getConf().getSocketTimeout(), dfsClient.getConf().getConnectToDnViaHostname(), locatedblock); final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock()); if (n >= 0) { return n; } } catch (IOException ioe) { if (ioe instanceof RemoteException && (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException)) { // special case : replica might not be on the DN, treat as 0 length replicaNotFoundCount--; } if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode " + datanode + " for block " + locatedblock.getBlock(), ioe); } } finally { if (cdp != null) { RPC.stopProxy(cdp); } } } // Namenode told us about these locations, but none know about the replica // means that we hit the race between pipeline creation start and end. // we require all 3 because some other exception could have happened // on a DN that has it. we want to report that error if (replicaNotFoundCount == 0) { return 0; } throw new IOException("Cannot obtain block length for " + locatedblock); }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
private DNAddrPair chooseDataNode(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) throws IOException { while (true) { try {/*from w w w . ja v a2 s .c om*/ return getBestNodeDNAddrPair(block, ignoredNodes); } catch (IOException ie) { String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), deadNodes, ignoredNodes); String blockInfo = block.getBlock() + " file=" + src; if (failures >= dfsClient.getMaxBlockAcquireFailures()) { String description = "Could not obtain block: " + blockInfo; DFSClient.LOG.warn(description + errMsg + ". Throwing a BlockMissingException"); throw new BlockMissingException(src, description, block.getStartOffset()); } DatanodeInfo[] nodes = block.getLocations(); if (nodes == null || nodes.length == 0) { DFSClient.LOG.info("No node available for " + blockInfo); } DFSClient.LOG.info("Could not obtain " + block.getBlock() + " from any node: " + ie + errMsg + ". Will get new block locations from namenode and retry..."); try { // Introducing a random factor to the wait time before another retry. // The wait time is dependent on # of failures and a random factor. // At the first time of getting a BlockMissingException, the wait time // is a random number between 0..3000 ms. If the first retry // still fails, we will wait 3000 ms grace period before the 2nd retry. // Also at the second retry, the waiting window is expanded to 6000 ms // alleviating the request rate from the server. Similarly the 3rd retry // will wait 6000ms grace period before retry and the waiting window is // expanded to 9000ms. final int timeWindow = dfsClient.getConf().getTimeWindow(); double waitTime = timeWindow * failures + // grace period for the last round of attempt timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1) + " IOException, will wait for " + waitTime + " msec."); Thread.sleep((long) waitTime); } catch (InterruptedException iex) { } deadNodes.clear(); // 2nd option is to remove only nodes[blockId] openInfo(); block = getBlockAt(block.getStartOffset()); failures++; continue; } } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** * Get the best node from which to stream the data. * /* www. j a v a2 s . c o m*/ * @param block * LocatedBlock, containing nodes in priority order. * @param ignoredNodes * Do not choose nodes in this array (may be null) * @return The DNAddrPair of the best node. * @throws IOException */ private DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes) throws IOException { DatanodeInfo[] nodes = block.getLocations(); StorageType[] storageTypes = block.getStorageTypes(); DatanodeInfo chosenNode = null; StorageType storageType = null; if (nodes != null) { for (int i = 0; i < nodes.length; i++) { if (!deadNodes.containsKey(nodes[i]) && (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) { chosenNode = nodes[i]; // Storage types are ordered to correspond with nodes, so use the same // index to get storage type. if (storageTypes != null && i < storageTypes.length) { storageType = storageTypes[i]; } break; } } } if (chosenNode == null) { throw new IOException("No live nodes contain block " + block.getBlock() + " after checking nodes = " + Arrays.toString(nodes) + ", ignoredNodes = " + ignoredNodes); } final String dnAddr = chosenNode.getXferAddr(dfsClient.getConf().getConnectToDnViaHostname()); if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("Connecting to datanode " + dnAddr); } InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr); return new DNAddrPair(chosenNode, targetAddr, storageType); }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
private int pread(long position, byte[] buffer, int offset, int length) throws IOException { // sanity checks dfsClient.checkOpen();//from w w w . j a v a 2s .c om if (closed.get()) { throw new IOException("Stream closed"); } failures = 0; long filelen = getFileLength(); if ((position < 0) || (position >= filelen)) { return -1; } int realLen = length; if ((position + length) > filelen) { realLen = (int) (filelen - position); } // determine the block and byte range within the block // corresponding to position and realLen List<LocatedBlock> blockRange = getBlockRange(position, realLen); int remaining = realLen; Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap = new HashMap<ExtendedBlock, Set<DatanodeInfo>>(); for (LocatedBlock blk : blockRange) { long targetStart = position - blk.getStartOffset(); long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart); try { if (dfsClient.isHedgedReadsEnabled()) { hedgedFetchBlockByteRange(blk, targetStart, targetStart + bytesToRead - 1, buffer, offset, corruptedBlockMap); } else { fetchBlockByteRange(blk, targetStart, targetStart + bytesToRead - 1, buffer, offset, corruptedBlockMap); } } finally { // Check and report if any block replicas are corrupted. // BlockMissingException may be caught if all block replicas are // corrupted. reportCheckSumFailure(corruptedBlockMap, blk.getLocations().length); } remaining -= bytesToRead; position += bytesToRead; offset += bytesToRead; } assert remaining == 0 : "Wrong number of bytes read."; if (dfsClient.stats != null) { dfsClient.stats.incrementBytesRead(realLen); } return realLen; }
From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java
License:Apache License
private void doSymlinkPathInDir(File fileInDir, LocatedBlocks blockLocations, List<LocatedBlock> locatedBlocks) throws IOException { assertEquals(1, locatedBlocks.size()); LocatedBlock locatedBlock = blockLocations.get(0); assertEquals(1, locatedBlock.getLocations().length); DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0]; ClientDatanodeProtocol createClientDatanodeProtocolProxy = HadoopFileLocationPrototypeTest .createClientDatanodeProtocolProxy(datanodeInfo, hadoopFileSystem.getConf(), 1000); BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy .getBlockLocalPathInfo(locatedBlock.getBlock(), locatedBlock.getBlockToken()); String absolutePathToBlock = blockLocalPathInfo.getBlockPath(); assertTrue(new File(absolutePathToBlock).exists()); FileUtil.symLink(absolutePathToBlock, fileInDir.getAbsolutePath()); }