List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks
public List<LocatedBlock> getLocatedBlocks()
From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java
License:Apache License
private void addExtendedBlocksFromNameNode(BackupReportWriter writer, ExternalExtendedBlockSort<Addresses> nameNodeBlocks, DFSClient client, FileStatus fs, Set<Path> pathSetToIgnore) throws IOException { Path qualifiedPath = fileSystem.makeQualified(fs.getPath()); if (shouldIgnore(pathSetToIgnore, qualifiedPath)) { return;//from w w w .j a va2s .co m } String src = qualifiedPath.toUri().getPath(); long start = 0; long length = fs.getLen(); LocatedBlocks locatedBlocks = client.getLocatedBlocks(src, start, length); for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) { DatanodeInfo[] locations = locatedBlock.getLocations(); ExtendedBlock extendedBlock = BackupUtil.fromHadoop(locatedBlock.getBlock()); Addresses addresses = new Addresses(locations); nameNodeBlocks.add(extendedBlock, addresses); writer.statusExtendedBlocksFromNameNode(src, extendedBlock, locations); } }
From source file:com.bigstep.datalake.JsonUtil.java
License:Apache License
/** Convert LocatedBlocks to a Json string. */ public static String toJsonString(final LocatedBlocks locatedblocks) throws IOException { if (locatedblocks == null) { return null; }/*from w w w .j av a2 s . c o m*/ final Map<String, Object> m = new TreeMap<String, Object>(); m.put("fileLength", locatedblocks.getFileLength()); m.put("isUnderConstruction", locatedblocks.isUnderConstruction()); m.put("locatedBlocks", toJsonArray(locatedblocks.getLocatedBlocks())); m.put("lastLocatedBlock", toJsonMap(locatedblocks.getLastLocatedBlock())); m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete()); return toJsonString(LocatedBlocks.class, m); }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get the checksum of the whole file of a range of the file. Note that the * range always starts from the beginning of the file. * /* w ww .j a v a2 s . c o m*/ * @param src * The file path * @param length * the length of the range, i.e., the range is [0, length] * @return The checksum * @see DistributedFileSystem#getFileChecksum(Path) */ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException { checkOpen(); Preconditions.checkArgument(length >= 0); // get block locations for the file range LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); final DataOutputBuffer md5out = new DataOutputBuffer(); int bytesPerCRC = -1; DataChecksum.Type crcType = DataChecksum.Type.DEFAULT; long crcPerBlock = 0; boolean refetchBlocks = false; int lastRetriedIndex = -1; // get block checksum for each block long remaining = length; if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { remaining = Math.min(length, blockLocations.getFileLength()); } for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) { if (refetchBlocks) { // refetch to get fresh tokens blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } locatedblocks = blockLocations.getLocatedBlocks(); refetchBlocks = false; } LocatedBlock lb = locatedblocks.get(i); final ExtendedBlock block = lb.getBlock(); if (remaining < block.getNumBytes()) { block.setNumBytes(remaining); } remaining -= block.getNumBytes(); final DatanodeInfo[] datanodes = lb.getLocations(); // try each datanode location of the block final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout(); boolean done = false; for (int j = 0; !done && j < datanodes.length; j++) { DataOutputStream out = null; DataInputStream in = null; try { // connect to a datanode IOStreamPair pair = connectToDN(datanodes[j], timeout, lb); out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); in = new DataInputStream(pair.in); if (LOG.isDebugEnabled()) { LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block); } // get block MD5 new Sender(out).blockChecksum(block, lb.getBlockToken()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "for block " + block + " from datanode " + datanodes[j]; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse(); // read byte-per-checksum final int bpc = checksumData.getBytesPerCrc(); if (i == 0) { // first block bytesPerCRC = bpc; } else if (bpc != bytesPerCRC) { throw new IOException( "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC); } // read crc-per-block final long cpb = checksumData.getCrcPerBlock(); if (locatedblocks.size() > 1 && i == 0) { crcPerBlock = cpb; } // read md5 final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray()); md5.write(md5out); // read crc-type final DataChecksum.Type ct; if (checksumData.hasCrcType()) { ct = PBHelper.convert(checksumData.getCrcType()); } else { LOG.debug("Retrieving checksum from an earlier-version DataNode: " + "inferring checksum by reading first byte"); ct = inferChecksumTypeByReading(lb, datanodes[j]); } if (i == 0) { // first block crcType = ct; } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) { // if crc types are mixed in a file crcType = DataChecksum.Type.MIXED; } done = true; if (LOG.isDebugEnabled()) { if (i == 0) { LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock); } LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5); } } catch (InvalidBlockTokenException ibte) { if (i > lastRetriedIndex) { if (LOG.isDebugEnabled()) { LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src + " for block " + block + " from datanode " + datanodes[j] + ". Will retry the block once."); } lastRetriedIndex = i; done = true; // actually it's not done; but we'll retry i--; // repeat at i-th block refetchBlocks = true; break; } } catch (IOException ie) { LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } if (!done) { throw new IOException("Fail to get block MD5 for " + block); } } // compute file MD5 final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); switch (crcType) { case CRC32: return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); case CRC32C: return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); default: // If there is no block allocated for the file, // return one with the magic entry that matches what previous // hdfs versions return. if (locatedblocks.size() == 0) { return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5); } // we should never get here since the validity was checked // when getCrcType() was called above. return null; } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException { final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0); if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("newInfo = " + newInfo); }/*w ww . j a v a 2 s . co m*/ if (newInfo == null) { throw new IOException("Cannot open filename " + src); } if (locatedBlocks != null) { Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator(); Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator(); while (oldIter.hasNext() && newIter.hasNext()) { if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) { throw new IOException("Blocklist for " + src + " has changed!"); } } } locatedBlocks = newInfo; long lastBlockBeingWrittenLength = 0; if (!locatedBlocks.isLastBlockComplete()) { final LocatedBlock last = locatedBlocks.getLastLocatedBlock(); if (last != null) { if (last.getLocations().length == 0) { if (last.getBlockSize() == 0) { // if the length is zero, then no data has been written to // datanode. So no need to wait for the locations. return 0; } return -1; } final long len = readBlockLength(last); last.getBlock().setNumBytes(len); lastBlockBeingWrittenLength = len; } } fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo(); return lastBlockBeingWrittenLength; }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** * Get block at the specified position./*from w w w .jav a 2 s. c o m*/ * Fetch it from the namenode if not cached. * * @param offset * block corresponding to this offset in file is returned * @return located block * @throws IOException */ private LocatedBlock getBlockAt(long offset) throws IOException { synchronized (infoLock) { assert (locatedBlocks != null) : "locatedBlocks is null"; final LocatedBlock blk; // check offset if (offset < 0 || offset >= getFileLength()) { throw new IOException("offset < 0 || offset >= getFileLength(), offset=" + offset + ", locatedBlocks=" + locatedBlocks); } else if (offset >= locatedBlocks.getFileLength()) { // offset to the portion of the last block, // which is not known to the name-node yet; // getting the last block blk = locatedBlocks.getLastLocatedBlock(); } else { // search cached blocks first int targetBlockIdx = locatedBlocks.findBlock(offset); if (targetBlockIdx < 0) { // block is not cached targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); // fetch more blocks final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); assert (newBlocks != null) : "Could not find target position " + offset; locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); } blk = locatedBlocks.get(targetBlockIdx); } return blk; } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** Fetch a block from namenode and cache it */ private void fetchBlockAt(long offset) throws IOException { synchronized (infoLock) { int targetBlockIdx = locatedBlocks.findBlock(offset); if (targetBlockIdx < 0) { // block is not cached targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); }/*from w w w . j a v a 2 s . c o m*/ // fetch blocks final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); if (newBlocks == null) { throw new IOException("Could not find target position " + offset); } locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); } }
From source file:com.mellanox.r4h.DFSInputStream.java
License:Apache License
/** * Get blocks in the specified range./*from ww w. j a v a2s . c o m*/ * Includes only the complete blocks. * Fetch them from the namenode if not cached. */ private List<LocatedBlock> getFinalizedBlockRange(long offset, long length) throws IOException { synchronized (infoLock) { assert (locatedBlocks != null) : "locatedBlocks is null"; List<LocatedBlock> blockRange = new ArrayList<LocatedBlock>(); // search cached blocks first int blockIdx = locatedBlocks.findBlock(offset); if (blockIdx < 0) { // block is not cached blockIdx = LocatedBlocks.getInsertIndex(blockIdx); } long remaining = length; long curOff = offset; while (remaining > 0) { LocatedBlock blk = null; if (blockIdx < locatedBlocks.locatedBlockCount()) blk = locatedBlocks.get(blockIdx); if (blk == null || curOff < blk.getStartOffset()) { LocatedBlocks newBlocks; newBlocks = dfsClient.getLocatedBlocks(src, curOff, remaining); locatedBlocks.insertRange(blockIdx, newBlocks.getLocatedBlocks()); continue; } assert curOff >= blk.getStartOffset() : "Block not found"; blockRange.add(blk); long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff; remaining -= bytesRead; curOff += bytesRead; blockIdx++; } return blockRange; } }
From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java
License:Apache License
private void createSymlinkToPathInDir(Path path, File dir) throws IOException { File fileInDir = new File(dir, path.getName()); DistributedFileSystem dfs = (DistributedFileSystem) hadoopFileSystem; ClientProtocol namenode = dfs.getClient().namenode; String pathOnHadoop = path.toUri().getPath(); LocatedBlocks blockLocations = namenode.getBlockLocations(pathOnHadoop, 0, Long.MAX_VALUE); List<LocatedBlock> locatedBlocks = blockLocations.getLocatedBlocks(); if (!locatedBlocks.isEmpty()) { doSymlinkPathInDir(fileInDir, blockLocations, locatedBlocks); } else {//w w w .j a v a2 s. c om // Means that they don't have a block and that they are empty files. Just // create them. assertTrue(fileInDir.createNewFile()); } }
From source file:com.splunk.shuttl.prototype.symlink.HadoopFileLocationPrototypeTest.java
License:Apache License
/** * Before running the test: <br/>// w w w . j a v a 2 s . c o m * <br/> * 1. run `ant hadoop-setup`<br/> * 2. run the following command in build-cache/hadoop: bin/hadoop fs -put * ../../test/resources/splunk-buckets/SPLUNK_BUCKET/ * db_1336330530_1336330530_0 / <br/> * <br/> * Note: This will be automated soon! */ @Test(groups = { "prototype" }) public void printPathToABlockOnHadoop() throws IOException { // Connect to hdfs. Needs to be HDFS because we're casting to // org.apache.hadoop.hdfs.DistributedFileSystem URI uri = URI.create("hdfs://localhost:9000"); fileSystem = (DistributedFileSystem) FileSystem.get(uri, new Configuration()); namenode = fileSystem.getClient().namenode; // Get the path to the bucket that's been put to hadoop. Path bucketPath = new Path("/db_1336330530_1336330530_0"); assertTrue(fileSystem.exists(bucketPath)); // path to any file in the bucket. Chose .csv because it's // readable/verifiable. String filePath = "/db_1336330530_1336330530_0/bucket_info.csv"; // Get location of the blocks for the file. LocatedBlocks blockLocations = namenode.getBlockLocations(filePath, 0, Long.MAX_VALUE); // There exists only one block because of how everything is set up. LocatedBlock locatedBlock = blockLocations.getLocatedBlocks().get(0); Block block = locatedBlock.getBlock(); // There exists only one node. DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0]; // Get a proxy to the Datanode containing the block. (This took a while to // figure out) ClientDatanodeProtocol createClientDatanodeProtocolProxy = createClientDatanodeProtocolProxy(datanodeInfo, fileSystem.getConf(), 1000); // Get the local block path. Requires two settings on the server side of // hadoop. // 1. dfs.client.read.shortcircuit : 'true' // 2. dfs.block.local-path-access.user : '<user running the tests (ie. // periksson)>' BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy.getBlockLocalPathInfo(block, locatedBlock.getBlockToken()); // Printing the local path to the block, so we can access it!! System.out.println("BLOCK PATH: " + blockLocalPathInfo.getBlockPath() + " !!!!!!!!!!!!!!!!!!"); }
From source file:fm.last.hadoop.tools.BlockFinder.java
License:Apache License
@Override public int run(String[] argv) throws IOException { StringBuilder b = new StringBuilder(); ClientProtocol namenode = DFSClient.createNamenode(getConf()); for (String fileName : argv) { FileStatus[] fileStatuses = fs.globStatus(new Path(fileName)); for (FileStatus fileStatus : fileStatuses) { if (!fileStatus.isDir()) { out.println("FILE: " + fileStatus.getPath().toString()); String path = fileStatus.getPath().toUri().getPath(); LocatedBlocks blocks = namenode.getBlockLocations(path, 0, fileStatus.getLen()); for (LocatedBlock block : blocks.getLocatedBlocks()) { b.setLength(0);// w w w . j a v a2s . co m b.append(block.getBlock()); b.append(" - "); List<String> nodes = newArrayList(); for (DatanodeInfo datanodeInfo : block.getLocations()) { nodes.add(datanodeInfo.name); } b.append(Joiner.on(", ").join(nodes)); out.println(b.toString()); } } out.println(); } } return 0; }