Example usage for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks

List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks


In this page you can find the example usage for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks.


public List<LocatedBlock> getLocatedBlocks() 

Source Link


Get located blocks.


From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

private void addExtendedBlocksFromNameNode(BackupReportWriter writer,
        ExternalExtendedBlockSort<Addresses> nameNodeBlocks, DFSClient client, FileStatus fs,
        Set<Path> pathSetToIgnore) throws IOException {
    Path qualifiedPath = fileSystem.makeQualified(fs.getPath());
    if (shouldIgnore(pathSetToIgnore, qualifiedPath)) {
        return;//from  w  w  w .j a va2s  .co m

    String src = qualifiedPath.toUri().getPath();
    long start = 0;
    long length = fs.getLen();

    LocatedBlocks locatedBlocks = client.getLocatedBlocks(src, start, length);
    for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
        DatanodeInfo[] locations = locatedBlock.getLocations();
        ExtendedBlock extendedBlock = BackupUtil.fromHadoop(locatedBlock.getBlock());
        Addresses addresses = new Addresses(locations);
        nameNodeBlocks.add(extendedBlock, addresses);
        writer.statusExtendedBlocksFromNameNode(src, extendedBlock, locations);

From source file:com.bigstep.datalake.JsonUtil.java

License:Apache License

/** Convert LocatedBlocks to a Json string. */
public static String toJsonString(final LocatedBlocks locatedblocks) throws IOException {
    if (locatedblocks == null) {
        return null;
    }/*from  w  w w  .j  av  a2 s  . c o m*/

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("fileLength", locatedblocks.getFileLength());
    m.put("isUnderConstruction", locatedblocks.isUnderConstruction());

    m.put("locatedBlocks", toJsonArray(locatedblocks.getLocatedBlocks()));
    m.put("lastLocatedBlock", toJsonMap(locatedblocks.getLastLocatedBlock()));
    m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete());
    return toJsonString(LocatedBlocks.class, m);

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * /*  w ww .j  a v a2  s .  c  o  m*/
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException {
    final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0);
    if (DFSClient.LOG.isDebugEnabled()) {
        DFSClient.LOG.debug("newInfo = " + newInfo);
    }/*w ww . j  a v  a  2  s .  co  m*/
    if (newInfo == null) {
        throw new IOException("Cannot open filename " + src);

    if (locatedBlocks != null) {
        Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator();
        Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator();
        while (oldIter.hasNext() && newIter.hasNext()) {
            if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) {
                throw new IOException("Blocklist for " + src + " has changed!");
    locatedBlocks = newInfo;
    long lastBlockBeingWrittenLength = 0;
    if (!locatedBlocks.isLastBlockComplete()) {
        final LocatedBlock last = locatedBlocks.getLastLocatedBlock();
        if (last != null) {
            if (last.getLocations().length == 0) {
                if (last.getBlockSize() == 0) {
                    // if the length is zero, then no data has been written to
                    // datanode. So no need to wait for the locations.
                    return 0;
                return -1;
            final long len = readBlockLength(last);
            lastBlockBeingWrittenLength = len;

    fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo();

    return lastBlockBeingWrittenLength;

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

 * Get block at the specified position./*from  w  w  w  .jav a  2  s. c  o  m*/
 * Fetch it from the namenode if not cached.
 * @param offset
 *            block corresponding to this offset in file is returned
 * @return located block
 * @throws IOException
private LocatedBlock getBlockAt(long offset) throws IOException {
    synchronized (infoLock) {
        assert (locatedBlocks != null) : "locatedBlocks is null";

        final LocatedBlock blk;

        // check offset
        if (offset < 0 || offset >= getFileLength()) {
            throw new IOException("offset < 0 || offset >= getFileLength(), offset=" + offset
                    + ", locatedBlocks=" + locatedBlocks);
        } else if (offset >= locatedBlocks.getFileLength()) {
            // offset to the portion of the last block,
            // which is not known to the name-node yet;
            // getting the last block
            blk = locatedBlocks.getLastLocatedBlock();
        } else {
            // search cached blocks first
            int targetBlockIdx = locatedBlocks.findBlock(offset);
            if (targetBlockIdx < 0) { // block is not cached
                targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
                // fetch more blocks
                final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset);
                assert (newBlocks != null) : "Could not find target position " + offset;
                locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
            blk = locatedBlocks.get(targetBlockIdx);
        return blk;

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/** Fetch a block from namenode and cache it */
private void fetchBlockAt(long offset) throws IOException {
    synchronized (infoLock) {
        int targetBlockIdx = locatedBlocks.findBlock(offset);
        if (targetBlockIdx < 0) { // block is not cached
            targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
        }/*from w w w  .  j  a v a  2 s . c o m*/
        // fetch blocks
        final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset);
        if (newBlocks == null) {
            throw new IOException("Could not find target position " + offset);
        locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

 * Get blocks in the specified range./*from ww w. j a  v  a2s .  c  o  m*/
 * Includes only the complete blocks.
 * Fetch them from the namenode if not cached.
private List<LocatedBlock> getFinalizedBlockRange(long offset, long length) throws IOException {
    synchronized (infoLock) {
        assert (locatedBlocks != null) : "locatedBlocks is null";
        List<LocatedBlock> blockRange = new ArrayList<LocatedBlock>();
        // search cached blocks first
        int blockIdx = locatedBlocks.findBlock(offset);
        if (blockIdx < 0) { // block is not cached
            blockIdx = LocatedBlocks.getInsertIndex(blockIdx);
        long remaining = length;
        long curOff = offset;
        while (remaining > 0) {
            LocatedBlock blk = null;
            if (blockIdx < locatedBlocks.locatedBlockCount())
                blk = locatedBlocks.get(blockIdx);
            if (blk == null || curOff < blk.getStartOffset()) {
                LocatedBlocks newBlocks;
                newBlocks = dfsClient.getLocatedBlocks(src, curOff, remaining);
                locatedBlocks.insertRange(blockIdx, newBlocks.getLocatedBlocks());
            assert curOff >= blk.getStartOffset() : "Block not found";
            long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff;
            remaining -= bytesRead;
            curOff += bytesRead;
        return blockRange;

From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java

License:Apache License

private void createSymlinkToPathInDir(Path path, File dir) throws IOException {
    File fileInDir = new File(dir, path.getName());

    DistributedFileSystem dfs = (DistributedFileSystem) hadoopFileSystem;
    ClientProtocol namenode = dfs.getClient().namenode;
    String pathOnHadoop = path.toUri().getPath();
    LocatedBlocks blockLocations = namenode.getBlockLocations(pathOnHadoop, 0, Long.MAX_VALUE);
    List<LocatedBlock> locatedBlocks = blockLocations.getLocatedBlocks();
    if (!locatedBlocks.isEmpty()) {
        doSymlinkPathInDir(fileInDir, blockLocations, locatedBlocks);
    } else {//w  w w .j  a v a2 s.  c  om
        // Means that they don't have a block and that they are empty files. Just
        // create them.

From source file:com.splunk.shuttl.prototype.symlink.HadoopFileLocationPrototypeTest.java

License:Apache License

 * Before running the test: <br/>//  w w  w  .  j  a v a  2  s . c o  m
 * <br/>
 * 1. run `ant hadoop-setup`<br/>
 * 2. run the following command in build-cache/hadoop: bin/hadoop fs -put
 * ../../test/resources/splunk-buckets/SPLUNK_BUCKET/
 * db_1336330530_1336330530_0 / <br/>
 * <br/>
 * Note: This will be automated soon!
@Test(groups = { "prototype" })
public void printPathToABlockOnHadoop() throws IOException {
    // Connect to hdfs. Needs to be HDFS because we're casting to
    // org.apache.hadoop.hdfs.DistributedFileSystem
    URI uri = URI.create("hdfs://localhost:9000");
    fileSystem = (DistributedFileSystem) FileSystem.get(uri, new Configuration());
    namenode = fileSystem.getClient().namenode;

    // Get the path to the bucket that's been put to hadoop.
    Path bucketPath = new Path("/db_1336330530_1336330530_0");

    // path to any file in the bucket. Chose .csv because it's
    // readable/verifiable.
    String filePath = "/db_1336330530_1336330530_0/bucket_info.csv";

    // Get location of the blocks for the file.
    LocatedBlocks blockLocations = namenode.getBlockLocations(filePath, 0, Long.MAX_VALUE);
    // There exists only one block because of how everything is set up.
    LocatedBlock locatedBlock = blockLocations.getLocatedBlocks().get(0);
    Block block = locatedBlock.getBlock();
    // There exists only one node.
    DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0];

    // Get a proxy to the Datanode containing the block. (This took a while to
    // figure out)
    ClientDatanodeProtocol createClientDatanodeProtocolProxy = createClientDatanodeProtocolProxy(datanodeInfo,
            fileSystem.getConf(), 1000);

    // Get the local block path. Requires two settings on the server side of
    // hadoop.
    // 1. dfs.client.read.shortcircuit : 'true'
    // 2. dfs.block.local-path-access.user : '<user running the tests (ie.
    // periksson)>'
    BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy.getBlockLocalPathInfo(block,
    // Printing the local path to the block, so we can access it!!
    System.out.println("BLOCK PATH: " + blockLocalPathInfo.getBlockPath() + " !!!!!!!!!!!!!!!!!!");

From source file:fm.last.hadoop.tools.BlockFinder.java

License:Apache License

public int run(String[] argv) throws IOException {
    StringBuilder b = new StringBuilder();

    ClientProtocol namenode = DFSClient.createNamenode(getConf());
    for (String fileName : argv) {
        FileStatus[] fileStatuses = fs.globStatus(new Path(fileName));
        for (FileStatus fileStatus : fileStatuses) {
            if (!fileStatus.isDir()) {
                out.println("FILE: " + fileStatus.getPath().toString());

                String path = fileStatus.getPath().toUri().getPath();
                LocatedBlocks blocks = namenode.getBlockLocations(path, 0, fileStatus.getLen());

                for (LocatedBlock block : blocks.getLocatedBlocks()) {
                    b.setLength(0);//  w w  w  .  j  a v  a2s  . co  m
                    b.append(" - ");

                    List<String> nodes = newArrayList();
                    for (DatanodeInfo datanodeInfo : block.getLocations()) {
                    b.append(Joiner.on(", ").join(nodes));

    return 0;