Example usage for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks

List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs.protocol LocatedBlocks getLocatedBlocks.

Prototype

public List<LocatedBlock> getLocatedBlocks() 

Source Link

Document

Get located blocks.

Usage

From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

private void addExtendedBlocksFromNameNode(BackupReportWriter writer,
        ExternalExtendedBlockSort<Addresses> nameNodeBlocks, DFSClient client, FileStatus fs,
        Set<Path> pathSetToIgnore) throws IOException {
    Path qualifiedPath = fileSystem.makeQualified(fs.getPath());
    if (shouldIgnore(pathSetToIgnore, qualifiedPath)) {
        return;//from  w  w  w .j a va2s  .co m
    }

    String src = qualifiedPath.toUri().getPath();
    long start = 0;
    long length = fs.getLen();

    LocatedBlocks locatedBlocks = client.getLocatedBlocks(src, start, length);
    for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
        DatanodeInfo[] locations = locatedBlock.getLocations();
        ExtendedBlock extendedBlock = BackupUtil.fromHadoop(locatedBlock.getBlock());
        Addresses addresses = new Addresses(locations);
        nameNodeBlocks.add(extendedBlock, addresses);
        writer.statusExtendedBlocksFromNameNode(src, extendedBlock, locations);
    }
}

From source file:com.bigstep.datalake.JsonUtil.java

License:Apache License

/** Convert LocatedBlocks to a Json string. */
public static String toJsonString(final LocatedBlocks locatedblocks) throws IOException {
    if (locatedblocks == null) {
        return null;
    }/*from  w  w w  .j  av  a2 s  . c o m*/

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("fileLength", locatedblocks.getFileLength());
    m.put("isUnderConstruction", locatedblocks.isUnderConstruction());

    m.put("locatedBlocks", toJsonArray(locatedblocks.getLocatedBlocks()));
    m.put("lastLocatedBlock", toJsonMap(locatedblocks.getLastLocatedBlock()));
    m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete());
    return toJsonString(LocatedBlocks.class, m);
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * /*  w ww .j  a v a2  s .  c  o  m*/
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException {
    final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0);
    if (DFSClient.LOG.isDebugEnabled()) {
        DFSClient.LOG.debug("newInfo = " + newInfo);
    }/*w ww . j  a v  a  2  s .  co  m*/
    if (newInfo == null) {
        throw new IOException("Cannot open filename " + src);
    }

    if (locatedBlocks != null) {
        Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator();
        Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator();
        while (oldIter.hasNext() && newIter.hasNext()) {
            if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) {
                throw new IOException("Blocklist for " + src + " has changed!");
            }
        }
    }
    locatedBlocks = newInfo;
    long lastBlockBeingWrittenLength = 0;
    if (!locatedBlocks.isLastBlockComplete()) {
        final LocatedBlock last = locatedBlocks.getLastLocatedBlock();
        if (last != null) {
            if (last.getLocations().length == 0) {
                if (last.getBlockSize() == 0) {
                    // if the length is zero, then no data has been written to
                    // datanode. So no need to wait for the locations.
                    return 0;
                }
                return -1;
            }
            final long len = readBlockLength(last);
            last.getBlock().setNumBytes(len);
            lastBlockBeingWrittenLength = len;
        }
    }

    fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo();

    return lastBlockBeingWrittenLength;
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/**
 * Get block at the specified position./*from  w  w  w  .jav a  2  s. c  o  m*/
 * Fetch it from the namenode if not cached.
 * 
 * @param offset
 *            block corresponding to this offset in file is returned
 * @return located block
 * @throws IOException
 */
private LocatedBlock getBlockAt(long offset) throws IOException {
    synchronized (infoLock) {
        assert (locatedBlocks != null) : "locatedBlocks is null";

        final LocatedBlock blk;

        // check offset
        if (offset < 0 || offset >= getFileLength()) {
            throw new IOException("offset < 0 || offset >= getFileLength(), offset=" + offset
                    + ", locatedBlocks=" + locatedBlocks);
        } else if (offset >= locatedBlocks.getFileLength()) {
            // offset to the portion of the last block,
            // which is not known to the name-node yet;
            // getting the last block
            blk = locatedBlocks.getLastLocatedBlock();
        } else {
            // search cached blocks first
            int targetBlockIdx = locatedBlocks.findBlock(offset);
            if (targetBlockIdx < 0) { // block is not cached
                targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
                // fetch more blocks
                final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset);
                assert (newBlocks != null) : "Could not find target position " + offset;
                locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
            }
            blk = locatedBlocks.get(targetBlockIdx);
        }
        return blk;
    }
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/** Fetch a block from namenode and cache it */
private void fetchBlockAt(long offset) throws IOException {
    synchronized (infoLock) {
        int targetBlockIdx = locatedBlocks.findBlock(offset);
        if (targetBlockIdx < 0) { // block is not cached
            targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
        }/*from w w w  .  j  a v a  2 s . c o m*/
        // fetch blocks
        final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset);
        if (newBlocks == null) {
            throw new IOException("Could not find target position " + offset);
        }
        locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
    }
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/**
 * Get blocks in the specified range./*from ww w. j a  v  a2s .  c  o  m*/
 * Includes only the complete blocks.
 * Fetch them from the namenode if not cached.
 */
private List<LocatedBlock> getFinalizedBlockRange(long offset, long length) throws IOException {
    synchronized (infoLock) {
        assert (locatedBlocks != null) : "locatedBlocks is null";
        List<LocatedBlock> blockRange = new ArrayList<LocatedBlock>();
        // search cached blocks first
        int blockIdx = locatedBlocks.findBlock(offset);
        if (blockIdx < 0) { // block is not cached
            blockIdx = LocatedBlocks.getInsertIndex(blockIdx);
        }
        long remaining = length;
        long curOff = offset;
        while (remaining > 0) {
            LocatedBlock blk = null;
            if (blockIdx < locatedBlocks.locatedBlockCount())
                blk = locatedBlocks.get(blockIdx);
            if (blk == null || curOff < blk.getStartOffset()) {
                LocatedBlocks newBlocks;
                newBlocks = dfsClient.getLocatedBlocks(src, curOff, remaining);
                locatedBlocks.insertRange(blockIdx, newBlocks.getLocatedBlocks());
                continue;
            }
            assert curOff >= blk.getStartOffset() : "Block not found";
            blockRange.add(blk);
            long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff;
            remaining -= bytesRead;
            curOff += bytesRead;
            blockIdx++;
        }
        return blockRange;
    }
}

From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java

License:Apache License

private void createSymlinkToPathInDir(Path path, File dir) throws IOException {
    File fileInDir = new File(dir, path.getName());

    DistributedFileSystem dfs = (DistributedFileSystem) hadoopFileSystem;
    ClientProtocol namenode = dfs.getClient().namenode;
    String pathOnHadoop = path.toUri().getPath();
    LocatedBlocks blockLocations = namenode.getBlockLocations(pathOnHadoop, 0, Long.MAX_VALUE);
    List<LocatedBlock> locatedBlocks = blockLocations.getLocatedBlocks();
    if (!locatedBlocks.isEmpty()) {
        doSymlinkPathInDir(fileInDir, blockLocations, locatedBlocks);
    } else {//w  w w .j  a v a2 s.  c  om
        // Means that they don't have a block and that they are empty files. Just
        // create them.
        assertTrue(fileInDir.createNewFile());
    }
}

From source file:com.splunk.shuttl.prototype.symlink.HadoopFileLocationPrototypeTest.java

License:Apache License

/**
 * Before running the test: <br/>//  w w  w  .  j  a v a  2  s . c o  m
 * <br/>
 * 1. run `ant hadoop-setup`<br/>
 * 2. run the following command in build-cache/hadoop: bin/hadoop fs -put
 * ../../test/resources/splunk-buckets/SPLUNK_BUCKET/
 * db_1336330530_1336330530_0 / <br/>
 * <br/>
 * Note: This will be automated soon!
 */
@Test(groups = { "prototype" })
public void printPathToABlockOnHadoop() throws IOException {
    // Connect to hdfs. Needs to be HDFS because we're casting to
    // org.apache.hadoop.hdfs.DistributedFileSystem
    URI uri = URI.create("hdfs://localhost:9000");
    fileSystem = (DistributedFileSystem) FileSystem.get(uri, new Configuration());
    namenode = fileSystem.getClient().namenode;

    // Get the path to the bucket that's been put to hadoop.
    Path bucketPath = new Path("/db_1336330530_1336330530_0");
    assertTrue(fileSystem.exists(bucketPath));

    // path to any file in the bucket. Chose .csv because it's
    // readable/verifiable.
    String filePath = "/db_1336330530_1336330530_0/bucket_info.csv";

    // Get location of the blocks for the file.
    LocatedBlocks blockLocations = namenode.getBlockLocations(filePath, 0, Long.MAX_VALUE);
    // There exists only one block because of how everything is set up.
    LocatedBlock locatedBlock = blockLocations.getLocatedBlocks().get(0);
    Block block = locatedBlock.getBlock();
    // There exists only one node.
    DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0];

    // Get a proxy to the Datanode containing the block. (This took a while to
    // figure out)
    ClientDatanodeProtocol createClientDatanodeProtocolProxy = createClientDatanodeProtocolProxy(datanodeInfo,
            fileSystem.getConf(), 1000);

    // Get the local block path. Requires two settings on the server side of
    // hadoop.
    // 1. dfs.client.read.shortcircuit : 'true'
    // 2. dfs.block.local-path-access.user : '<user running the tests (ie.
    // periksson)>'
    BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy.getBlockLocalPathInfo(block,
            locatedBlock.getBlockToken());
    // Printing the local path to the block, so we can access it!!
    System.out.println("BLOCK PATH: " + blockLocalPathInfo.getBlockPath() + " !!!!!!!!!!!!!!!!!!");
}

From source file:fm.last.hadoop.tools.BlockFinder.java

License:Apache License

@Override
public int run(String[] argv) throws IOException {
    StringBuilder b = new StringBuilder();

    ClientProtocol namenode = DFSClient.createNamenode(getConf());
    for (String fileName : argv) {
        FileStatus[] fileStatuses = fs.globStatus(new Path(fileName));
        for (FileStatus fileStatus : fileStatuses) {
            if (!fileStatus.isDir()) {
                out.println("FILE: " + fileStatus.getPath().toString());

                String path = fileStatus.getPath().toUri().getPath();
                LocatedBlocks blocks = namenode.getBlockLocations(path, 0, fileStatus.getLen());

                for (LocatedBlock block : blocks.getLocatedBlocks()) {
                    b.setLength(0);//  w w  w  .  j  a v  a2s  . co  m
                    b.append(block.getBlock());
                    b.append(" - ");

                    List<String> nodes = newArrayList();
                    for (DatanodeInfo datanodeInfo : block.getLocations()) {
                        nodes.add(datanodeInfo.name);
                    }
                    b.append(Joiner.on(", ").join(nodes));
                    out.println(b.toString());
                }

            }
            out.println();
        }
    }
    return 0;
}