List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlock getBlockToken
public Token<BlockTokenIdentifier> getBlockToken()
From source file:com.bigstep.datalake.JsonUtil.java
License:Apache License
/** Convert a LocatedBlock to a Json map. */ private static Map<String, Object> toJsonMap(final LocatedBlock locatedblock) throws IOException { if (locatedblock == null) { return null; }/* w w w . j a v a2s . com*/ final Map<String, Object> m = new TreeMap<String, Object>(); m.put("blockToken", toJsonMap(locatedblock.getBlockToken())); m.put("isCorrupt", locatedblock.isCorrupt()); m.put("startOffset", locatedblock.getStartOffset()); m.put("block", toJsonMap(locatedblock.getBlock())); m.put("locations", toJsonArray(locatedblock.getLocations())); m.put("cachedLocations", toJsonArray(locatedblock.getCachedLocations())); return m; }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get the checksum of the whole file of a range of the file. Note that the * range always starts from the beginning of the file. * /*w w w . java2 s . c o m*/ * @param src * The file path * @param length * the length of the range, i.e., the range is [0, length] * @return The checksum * @see DistributedFileSystem#getFileChecksum(Path) */ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException { checkOpen(); Preconditions.checkArgument(length >= 0); // get block locations for the file range LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); final DataOutputBuffer md5out = new DataOutputBuffer(); int bytesPerCRC = -1; DataChecksum.Type crcType = DataChecksum.Type.DEFAULT; long crcPerBlock = 0; boolean refetchBlocks = false; int lastRetriedIndex = -1; // get block checksum for each block long remaining = length; if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { remaining = Math.min(length, blockLocations.getFileLength()); } for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) { if (refetchBlocks) { // refetch to get fresh tokens blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } locatedblocks = blockLocations.getLocatedBlocks(); refetchBlocks = false; } LocatedBlock lb = locatedblocks.get(i); final ExtendedBlock block = lb.getBlock(); if (remaining < block.getNumBytes()) { block.setNumBytes(remaining); } remaining -= block.getNumBytes(); final DatanodeInfo[] datanodes = lb.getLocations(); // try each datanode location of the block final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout(); boolean done = false; for (int j = 0; !done && j < datanodes.length; j++) { DataOutputStream out = null; DataInputStream in = null; try { // connect to a datanode IOStreamPair pair = connectToDN(datanodes[j], timeout, lb); out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); in = new DataInputStream(pair.in); if (LOG.isDebugEnabled()) { LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block); } // get block MD5 new Sender(out).blockChecksum(block, lb.getBlockToken()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "for block " + block + " from datanode " + datanodes[j]; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse(); // read byte-per-checksum final int bpc = checksumData.getBytesPerCrc(); if (i == 0) { // first block bytesPerCRC = bpc; } else if (bpc != bytesPerCRC) { throw new IOException( "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC); } // read crc-per-block final long cpb = checksumData.getCrcPerBlock(); if (locatedblocks.size() > 1 && i == 0) { crcPerBlock = cpb; } // read md5 final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray()); md5.write(md5out); // read crc-type final DataChecksum.Type ct; if (checksumData.hasCrcType()) { ct = PBHelper.convert(checksumData.getCrcType()); } else { LOG.debug("Retrieving checksum from an earlier-version DataNode: " + "inferring checksum by reading first byte"); ct = inferChecksumTypeByReading(lb, datanodes[j]); } if (i == 0) { // first block crcType = ct; } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) { // if crc types are mixed in a file crcType = DataChecksum.Type.MIXED; } done = true; if (LOG.isDebugEnabled()) { if (i == 0) { LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock); } LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5); } } catch (InvalidBlockTokenException ibte) { if (i > lastRetriedIndex) { if (LOG.isDebugEnabled()) { LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src + " for block " + block + " from datanode " + datanodes[j] + ". Will retry the block once."); } lastRetriedIndex = i; done = true; // actually it's not done; but we'll retry i--; // repeat at i-th block refetchBlocks = true; break; } } catch (IOException ie) { LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } if (!done) { throw new IOException("Fail to get block MD5 for " + block); } } // compute file MD5 final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); switch (crcType) { case CRC32: return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); case CRC32C: return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); default: // If there is no block allocated for the file, // return one with the magic entry that matches what previous // hdfs versions return. if (locatedblocks.size() == 0) { return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5); } // we should never get here since the validity was checked // when getCrcType() was called above. return null; } }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Connect to the given datanode's datantrasfer port, and return * the resulting IOStreamPair. This includes encryption wrapping, etc. *///from w ww .j a va2 s . c o m private IOStreamPair connectToDN(DatanodeInfo dn, int timeout, LocatedBlock lb) throws IOException { boolean success = false; Socket sock = null; try { sock = socketFactory.createSocket(); String dnAddr = dn.getXferAddr(getConf().getConnectToDnViaHostname()); if (LOG.isDebugEnabled()) { LOG.debug("Connecting to datanode " + dnAddr); } NetUtils.connect(sock, NetUtils.createSocketAddr(dnAddr), timeout); sock.setSoTimeout(timeout); OutputStream unbufOut = NetUtils.getOutputStream(sock); InputStream unbufIn = NetUtils.getInputStream(sock); IOStreamPair ret = saslClient.newSocketSend(sock, unbufOut, unbufIn, this, lb.getBlockToken(), dn); success = true; return ret; } finally { if (!success) { IOUtils.closeSocket(sock); } } }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Infer the checksum type for a replica by sending an OP_READ_BLOCK * for the first byte of that replica. This is used for compatibility * with older HDFS versions which did not include the checksum type in * OpBlockChecksumResponseProto.// w w w .j a va2 s.co m * * @param lb * the located block * @param dn * the connected datanode * @return the inferred checksum type * @throws IOException * if an error occurs */ private Type inferChecksumTypeByReading(LocatedBlock lb, DatanodeInfo dn) throws IOException { IOStreamPair pair = connectToDN(dn, dfsClientConf.socketTimeout(), lb); try { DataOutputStream out = new DataOutputStream( new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); DataInputStream in = new DataInputStream(pair.in); new Sender(out).readBlock(lb.getBlock(), lb.getBlockToken(), clientName, 0, 1, true, CachingStrategy.newDefaultStrategy()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "trying to read " + lb.getBlock() + " from datanode " + dn; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); return PBHelper.convert(reply.getReadOpChecksumInfo().getChecksum().getType()); } finally { IOUtils.cleanup(null, pair.in, pair.out); } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** * Open a DataInputStream to a DataNode so that it can be read from. * We get block ID and the IDs of the destinations at startup, from the namenode. *///from ww w . ja va 2s . c o m private synchronized DatanodeInfo blockSeekTo(long target) throws IOException { if (target >= getFileLength()) { throw new IOException("Attempted to read past end of file"); } // Will be getting a new BlockReader. closeCurrentBlockReader(); // // Connect to best DataNode for desired Block, with potential offset // DatanodeInfo chosenNode = null; int refetchToken = 1; // only need to get a new access token once int refetchEncryptionKey = 1; // only need to get a new encryption key once boolean connectFailedOnce = false; while (true) { // // Compute desired block // LocatedBlock targetBlock = getBlockAt(target); // update current position this.pos = target; this.blockEnd = targetBlock.getStartOffset() + targetBlock.getBlockSize() - 1; this.currentLocatedBlock = targetBlock; assert (target == pos) : "Wrong postion " + pos + " expect " + target; long offsetIntoBlock = target - targetBlock.getStartOffset(); DNAddrPair retval = chooseDataNode(targetBlock, null); chosenNode = retval.info; InetSocketAddress targetAddr = retval.addr; StorageType storageType = retval.storageType; try { ExtendedBlock blk = targetBlock.getBlock(); Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken(); CachingStrategy curCachingStrategy; boolean shortCircuitForbidden; synchronized (infoLock) { curCachingStrategy = cachingStrategy; shortCircuitForbidden = shortCircuitForbidden(); } blockReader = new BlockReaderFactory(dfsClient.getConf()).setInetSocketAddress(targetAddr) .setRemotePeerFactory(dfsClient).setDatanodeInfo(chosenNode).setStorageType(storageType) .setFileName(src).setBlock(blk).setBlockToken(accessToken).setStartOffset(offsetIntoBlock) .setVerifyChecksum(verifyChecksum).setClientName(dfsClient.clientName) .setLength(blk.getNumBytes() - offsetIntoBlock).setCachingStrategy(curCachingStrategy) .setAllowShortCircuitLocalReads(!shortCircuitForbidden) .setClientCacheContext(dfsClient.getClientContext()).setUserGroupInformation(dfsClient.ugi) .setConfiguration(dfsClient.getConfiguration()).build(); if (connectFailedOnce) { DFSClient.LOG.info("Successfully connected to " + targetAddr + " for " + blk); } return chosenNode; } catch (IOException ex) { if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) { DFSClient.LOG.info("Will fetch a new encryption key and retry, " + "encryption key was invalid when connecting to " + targetAddr + " : " + ex); // The encryption key used is invalid. refetchEncryptionKey--; dfsClient.clearDataEncryptionKey(); } else if (refetchToken > 0 && tokenRefetchNeeded(ex, targetAddr)) { refetchToken--; fetchBlockAt(target); } else { connectFailedOnce = true; DFSClient.LOG.warn("Failed to connect to " + targetAddr + " for block" + ", add to deadNodes and continue. " + ex, ex); // Put chosen node into dead list, continue addToDeadNodes(chosenNode); } } } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
private void actualGetFromOneDataNode(final DNAddrPair datanode, LocatedBlock block, final long start, final long end, byte[] buf, int offset, Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap) throws IOException { DFSClientFaultInjector.get().startFetchFromDatanode(); int refetchToken = 1; // only need to get a new access token once int refetchEncryptionKey = 1; // only need to get a new encryption key once while (true) { // cached block locations may have been updated by chooseDataNode() // or fetchBlockAt(). Always get the latest list of locations at the // start of the loop. CachingStrategy curCachingStrategy; boolean allowShortCircuitLocalReads; block = getBlockAt(block.getStartOffset()); synchronized (infoLock) { curCachingStrategy = cachingStrategy; allowShortCircuitLocalReads = !shortCircuitForbidden(); }/*w w w.j ava 2 s . c o m*/ DatanodeInfo chosenNode = datanode.info; InetSocketAddress targetAddr = datanode.addr; StorageType storageType = datanode.storageType; BlockReader reader = null; try { DFSClientFaultInjector.get().fetchFromDatanodeException(); Token<BlockTokenIdentifier> blockToken = block.getBlockToken(); int len = (int) (end - start + 1); reader = new BlockReaderFactory(dfsClient.getConf()).setInetSocketAddress(targetAddr) .setRemotePeerFactory(dfsClient).setDatanodeInfo(chosenNode).setStorageType(storageType) .setFileName(src).setBlock(block.getBlock()).setBlockToken(blockToken).setStartOffset(start) .setVerifyChecksum(verifyChecksum).setClientName(dfsClient.clientName).setLength(len) .setCachingStrategy(curCachingStrategy) .setAllowShortCircuitLocalReads(allowShortCircuitLocalReads) .setClientCacheContext(dfsClient.getClientContext()).setUserGroupInformation(dfsClient.ugi) .setConfiguration(dfsClient.getConfiguration()).build(); int nread = reader.readAll(buf, offset, len); updateReadStatistics(readStatistics, nread, reader); if (nread != len) { throw new IOException( "truncated return from reader.read(): " + "excpected " + len + ", got " + nread); } DFSClientFaultInjector.get().readFromDatanodeDelay(); return; } catch (ChecksumException e) { String msg = "fetchBlockByteRange(). Got a checksum exception for " + src + " at " + block.getBlock() + ":" + e.getPos() + " from " + chosenNode; DFSClient.LOG.warn(msg); // we want to remember what we have tried addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap); addToDeadNodes(chosenNode); throw new IOException(msg); } catch (IOException e) { if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) { DFSClient.LOG.info("Will fetch a new encryption key and retry, " + "encryption key was invalid when connecting to " + targetAddr + " : " + e); // The encryption key used is invalid. refetchEncryptionKey--; dfsClient.clearDataEncryptionKey(); continue; } else if (refetchToken > 0 && tokenRefetchNeeded(e, targetAddr)) { refetchToken--; try { fetchBlockAt(block.getStartOffset()); } catch (IOException fbae) { // ignore IOE, since we can retry it later in a loop } continue; } else { String msg = "Failed to connect to " + targetAddr + " for file " + src + " for block " + block.getBlock() + ":" + e; DFSClient.LOG.warn("Connection failure: " + msg, e); addToDeadNodes(chosenNode); throw new IOException(msg); } } finally { if (reader != null) { reader.close(); } } } }
From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java
License:Apache License
private void doSymlinkPathInDir(File fileInDir, LocatedBlocks blockLocations, List<LocatedBlock> locatedBlocks) throws IOException { assertEquals(1, locatedBlocks.size()); LocatedBlock locatedBlock = blockLocations.get(0); assertEquals(1, locatedBlock.getLocations().length); DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0]; ClientDatanodeProtocol createClientDatanodeProtocolProxy = HadoopFileLocationPrototypeTest .createClientDatanodeProtocolProxy(datanodeInfo, hadoopFileSystem.getConf(), 1000); BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy .getBlockLocalPathInfo(locatedBlock.getBlock(), locatedBlock.getBlockToken()); String absolutePathToBlock = blockLocalPathInfo.getBlockPath(); assertTrue(new File(absolutePathToBlock).exists()); FileUtil.symLink(absolutePathToBlock, fileInDir.getAbsolutePath()); }
From source file:com.splunk.shuttl.prototype.symlink.HadoopFileLocationPrototypeTest.java
License:Apache License
/** * Before running the test: <br/>//from w w w . j a v a 2s . com * <br/> * 1. run `ant hadoop-setup`<br/> * 2. run the following command in build-cache/hadoop: bin/hadoop fs -put * ../../test/resources/splunk-buckets/SPLUNK_BUCKET/ * db_1336330530_1336330530_0 / <br/> * <br/> * Note: This will be automated soon! */ @Test(groups = { "prototype" }) public void printPathToABlockOnHadoop() throws IOException { // Connect to hdfs. Needs to be HDFS because we're casting to // org.apache.hadoop.hdfs.DistributedFileSystem URI uri = URI.create("hdfs://localhost:9000"); fileSystem = (DistributedFileSystem) FileSystem.get(uri, new Configuration()); namenode = fileSystem.getClient().namenode; // Get the path to the bucket that's been put to hadoop. Path bucketPath = new Path("/db_1336330530_1336330530_0"); assertTrue(fileSystem.exists(bucketPath)); // path to any file in the bucket. Chose .csv because it's // readable/verifiable. String filePath = "/db_1336330530_1336330530_0/bucket_info.csv"; // Get location of the blocks for the file. LocatedBlocks blockLocations = namenode.getBlockLocations(filePath, 0, Long.MAX_VALUE); // There exists only one block because of how everything is set up. LocatedBlock locatedBlock = blockLocations.getLocatedBlocks().get(0); Block block = locatedBlock.getBlock(); // There exists only one node. DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0]; // Get a proxy to the Datanode containing the block. (This took a while to // figure out) ClientDatanodeProtocol createClientDatanodeProtocolProxy = createClientDatanodeProtocolProxy(datanodeInfo, fileSystem.getConf(), 1000); // Get the local block path. Requires two settings on the server side of // hadoop. // 1. dfs.client.read.shortcircuit : 'true' // 2. dfs.block.local-path-access.user : '<user running the tests (ie. // periksson)>' BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy.getBlockLocalPathInfo(block, locatedBlock.getBlockToken()); // Printing the local path to the block, so we can access it!! System.out.println("BLOCK PATH: " + blockLocalPathInfo.getBlockPath() + " !!!!!!!!!!!!!!!!!!"); }
From source file:org.openflamingo.remote.thrift.thriftfs.ThriftUtils.java
License:Apache License
public static Block toThrift(LocatedBlock block, String path, Map<DatanodeID, Integer> thriftPorts) throws java.io.IOException { if (block == null) { return new Block(); }//from w w w .j a va2 s. c o m List<DatanodeInfo> nodes = new ArrayList<DatanodeInfo>(); for (org.apache.hadoop.hdfs.protocol.DatanodeInfo n : block.getLocations()) { DatanodeInfo node = toThrift(n, thriftPorts); if (node.getThriftPort() != Constants.UNKNOWN_THRIFT_PORT) { nodes.add(node); } } org.apache.hadoop.hdfs.protocol.Block b = block.getBlock(); return new Block(b.getBlockId(), path, b.getNumBytes(), b.getGenerationStamp(), nodes, block.getStartOffset(), block.getBlockToken().encodeToUrlString()); }