Example usage for org.apache.hadoop.hdfs.protocol LocatedBlock getLocations

List of usage examples for org.apache.hadoop.hdfs.protocol LocatedBlock getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs.protocol LocatedBlock getLocations.

Prototype

public DatanodeInfo[] getLocations() 

Source Link

Document

Returns the locations associated with this block.

Usage

From source file:backup.namenode.NameNodeBackupBlockCheckProcessor.java

License:Apache License

private void addExtendedBlocksFromNameNode(BackupReportWriter writer,
        ExternalExtendedBlockSort<Addresses> nameNodeBlocks, DFSClient client, FileStatus fs,
        Set<Path> pathSetToIgnore) throws IOException {
    Path qualifiedPath = fileSystem.makeQualified(fs.getPath());
    if (shouldIgnore(pathSetToIgnore, qualifiedPath)) {
        return;//from   w ww.ja va  2 s. c o m
    }

    String src = qualifiedPath.toUri().getPath();
    long start = 0;
    long length = fs.getLen();

    LocatedBlocks locatedBlocks = client.getLocatedBlocks(src, start, length);
    for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
        DatanodeInfo[] locations = locatedBlock.getLocations();
        ExtendedBlock extendedBlock = BackupUtil.fromHadoop(locatedBlock.getBlock());
        Addresses addresses = new Addresses(locations);
        nameNodeBlocks.add(extendedBlock, addresses);
        writer.statusExtendedBlocksFromNameNode(src, extendedBlock, locations);
    }
}

From source file:com.bigstep.datalake.JsonUtil.java

License:Apache License

/** Convert a LocatedBlock to a Json map. */
private static Map<String, Object> toJsonMap(final LocatedBlock locatedblock) throws IOException {
    if (locatedblock == null) {
        return null;
    }//w w  w  . j ava  2s.co m

    final Map<String, Object> m = new TreeMap<String, Object>();
    m.put("blockToken", toJsonMap(locatedblock.getBlockToken()));
    m.put("isCorrupt", locatedblock.isCorrupt());
    m.put("startOffset", locatedblock.getStartOffset());
    m.put("block", toJsonMap(locatedblock.getBlock()));
    m.put("locations", toJsonArray(locatedblock.getLocations()));
    m.put("cachedLocations", toJsonArray(locatedblock.getCachedLocations()));
    return m;
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get block location information about a list of {@link HdfsBlockLocation}.
 * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to
 * get {@link BlockStorageLocation}s for blocks returned by
 * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)} .
 * /* w w w . j  a  v  a 2 s .  c o  m*/
 * This is done by making a round of RPCs to the associated datanodes, asking
 * the volume of each block replica. The returned array of {@link BlockStorageLocation} expose this information as a {@link VolumeId}.
 * 
 * @param blockLocations
 *            target blocks on which to query volume location information
 * @return volumeBlockLocations original block array augmented with additional
 *         volume location information for each replica.
 */
public BlockStorageLocation[] getBlockStorageLocations(List<BlockLocation> blockLocations)
        throws IOException, UnsupportedOperationException, InvalidBlockTokenException {
    if (!getConf().isHdfsBlocksMetadataEnabled()) {
        throw new UnsupportedOperationException("Datanode-side support for "
                + "getVolumeBlockLocations() must also be enabled in the client " + "configuration.");
    }
    // Downcast blockLocations and fetch out required LocatedBlock(s)
    List<LocatedBlock> blocks = new ArrayList<LocatedBlock>();
    for (BlockLocation loc : blockLocations) {
        if (!(loc instanceof HdfsBlockLocation)) {
            throw new ClassCastException(
                    "DFSClient#getVolumeBlockLocations " + "expected to be passed HdfsBlockLocations");
        }
        HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc;
        blocks.add(hdfsLoc.getLocatedBlock());
    }

    // Re-group the LocatedBlocks to be grouped by datanodes, with the values
    // a list of the LocatedBlocks on the datanode.
    Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>();
    for (LocatedBlock b : blocks) {
        for (DatanodeInfo info : b.getLocations()) {
            if (!datanodeBlocks.containsKey(info)) {
                datanodeBlocks.put(info, new ArrayList<LocatedBlock>());
            }
            List<LocatedBlock> l = datanodeBlocks.get(info);
            l.add(b);
        }
    }

    // Make RPCs to the datanodes to get volume locations for its replicas
    TraceScope scope = Trace.startSpan("getBlockStorageLocations", traceSampler);
    Map<DatanodeInfo, HdfsBlocksMetadata> metadatas;
    try {
        metadatas = BlockStorageLocationUtilBridge.queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks,
                getConf().getFileBlockStorageLocationsNumThreads(),
                getConf().getFileBlockStorageLocationsTimeoutMs(), getConf().getConnectToDnViaHostname());
        if (LOG.isTraceEnabled()) {
            LOG.trace("metadata returned: " + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas));
        }
    } finally {
        scope.close();
    }

    // Regroup the returned VolumeId metadata to again be grouped by
    // LocatedBlock rather than by datanode
    Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtilBridge
            .associateVolumeIdsWithBlocks(blocks, metadatas);

    // Combine original BlockLocations with new VolumeId information
    BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtilBridge
            .convertToVolumeBlockLocations(blocks, blockVolumeIds);

    return volumeBlockLocations;
}

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * // ww w  .  j a  v a2s  .  c om
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException {
    final LocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0);
    if (DFSClient.LOG.isDebugEnabled()) {
        DFSClient.LOG.debug("newInfo = " + newInfo);
    }/* w  w w . j  a v  a 2s .  c om*/
    if (newInfo == null) {
        throw new IOException("Cannot open filename " + src);
    }

    if (locatedBlocks != null) {
        Iterator<LocatedBlock> oldIter = locatedBlocks.getLocatedBlocks().iterator();
        Iterator<LocatedBlock> newIter = newInfo.getLocatedBlocks().iterator();
        while (oldIter.hasNext() && newIter.hasNext()) {
            if (!oldIter.next().getBlock().equals(newIter.next().getBlock())) {
                throw new IOException("Blocklist for " + src + " has changed!");
            }
        }
    }
    locatedBlocks = newInfo;
    long lastBlockBeingWrittenLength = 0;
    if (!locatedBlocks.isLastBlockComplete()) {
        final LocatedBlock last = locatedBlocks.getLastLocatedBlock();
        if (last != null) {
            if (last.getLocations().length == 0) {
                if (last.getBlockSize() == 0) {
                    // if the length is zero, then no data has been written to
                    // datanode. So no need to wait for the locations.
                    return 0;
                }
                return -1;
            }
            final long len = readBlockLength(last);
            last.getBlock().setNumBytes(len);
            lastBlockBeingWrittenLength = len;
        }
    }

    fileEncryptionInfo = locatedBlocks.getFileEncryptionInfo();

    return lastBlockBeingWrittenLength;
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/** Read the block length from one of the datanodes. */
private long readBlockLength(LocatedBlock locatedblock) throws IOException {
    assert locatedblock != null : "LocatedBlock cannot be null";
    int replicaNotFoundCount = locatedblock.getLocations().length;

    for (DatanodeInfo datanode : locatedblock.getLocations()) {
        ClientDatanodeProtocol cdp = null;

        try {/*from  ww w. j av a2s .c  om*/
            cdp = DFSUtil.createClientDatanodeProtocolProxy(datanode, dfsClient.getConfiguration(),
                    dfsClient.getConf().getSocketTimeout(), dfsClient.getConf().getConnectToDnViaHostname(),
                    locatedblock);

            final long n = cdp.getReplicaVisibleLength(locatedblock.getBlock());

            if (n >= 0) {
                return n;
            }
        } catch (IOException ioe) {
            if (ioe instanceof RemoteException
                    && (((RemoteException) ioe).unwrapRemoteException() instanceof ReplicaNotFoundException)) {
                // special case : replica might not be on the DN, treat as 0 length
                replicaNotFoundCount--;
            }

            if (DFSClient.LOG.isDebugEnabled()) {
                DFSClient.LOG.debug("Failed to getReplicaVisibleLength from datanode " + datanode
                        + " for block " + locatedblock.getBlock(), ioe);
            }
        } finally {
            if (cdp != null) {
                RPC.stopProxy(cdp);
            }
        }
    }

    // Namenode told us about these locations, but none know about the replica
    // means that we hit the race between pipeline creation start and end.
    // we require all 3 because some other exception could have happened
    // on a DN that has it. we want to report that error
    if (replicaNotFoundCount == 0) {
        return 0;
    }

    throw new IOException("Cannot obtain block length for " + locatedblock);
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

private DNAddrPair chooseDataNode(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes)
        throws IOException {
    while (true) {
        try {/*from  w w  w .  ja v  a2 s .c  om*/
            return getBestNodeDNAddrPair(block, ignoredNodes);
        } catch (IOException ie) {
            String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), deadNodes, ignoredNodes);
            String blockInfo = block.getBlock() + " file=" + src;
            if (failures >= dfsClient.getMaxBlockAcquireFailures()) {
                String description = "Could not obtain block: " + blockInfo;
                DFSClient.LOG.warn(description + errMsg + ". Throwing a BlockMissingException");
                throw new BlockMissingException(src, description, block.getStartOffset());
            }

            DatanodeInfo[] nodes = block.getLocations();
            if (nodes == null || nodes.length == 0) {
                DFSClient.LOG.info("No node available for " + blockInfo);
            }
            DFSClient.LOG.info("Could not obtain " + block.getBlock() + " from any node: " + ie + errMsg
                    + ". Will get new block locations from namenode and retry...");
            try {
                // Introducing a random factor to the wait time before another retry.
                // The wait time is dependent on # of failures and a random factor.
                // At the first time of getting a BlockMissingException, the wait time
                // is a random number between 0..3000 ms. If the first retry
                // still fails, we will wait 3000 ms grace period before the 2nd retry.
                // Also at the second retry, the waiting window is expanded to 6000 ms
                // alleviating the request rate from the server. Similarly the 3rd retry
                // will wait 6000ms grace period before retry and the waiting window is
                // expanded to 9000ms.
                final int timeWindow = dfsClient.getConf().getTimeWindow();
                double waitTime = timeWindow * failures + // grace period for the last round of attempt
                        timeWindow * (failures + 1) * DFSUtil.getRandom().nextDouble(); // expanding time window for each failure
                DFSClient.LOG.warn("DFS chooseDataNode: got # " + (failures + 1)
                        + " IOException, will wait for " + waitTime + " msec.");
                Thread.sleep((long) waitTime);
            } catch (InterruptedException iex) {
            }
            deadNodes.clear(); // 2nd option is to remove only nodes[blockId]
            openInfo();
            block = getBlockAt(block.getStartOffset());
            failures++;
            continue;
        }
    }
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

/**
 * Get the best node from which to stream the data.
 * /*  www. j a  v  a2 s . c o  m*/
 * @param block
 *            LocatedBlock, containing nodes in priority order.
 * @param ignoredNodes
 *            Do not choose nodes in this array (may be null)
 * @return The DNAddrPair of the best node.
 * @throws IOException
 */
private DNAddrPair getBestNodeDNAddrPair(LocatedBlock block, Collection<DatanodeInfo> ignoredNodes)
        throws IOException {
    DatanodeInfo[] nodes = block.getLocations();
    StorageType[] storageTypes = block.getStorageTypes();
    DatanodeInfo chosenNode = null;
    StorageType storageType = null;
    if (nodes != null) {
        for (int i = 0; i < nodes.length; i++) {
            if (!deadNodes.containsKey(nodes[i])
                    && (ignoredNodes == null || !ignoredNodes.contains(nodes[i]))) {
                chosenNode = nodes[i];
                // Storage types are ordered to correspond with nodes, so use the same
                // index to get storage type.
                if (storageTypes != null && i < storageTypes.length) {
                    storageType = storageTypes[i];
                }
                break;
            }
        }
    }
    if (chosenNode == null) {
        throw new IOException("No live nodes contain block " + block.getBlock() + " after checking nodes = "
                + Arrays.toString(nodes) + ", ignoredNodes = " + ignoredNodes);
    }
    final String dnAddr = chosenNode.getXferAddr(dfsClient.getConf().getConnectToDnViaHostname());
    if (DFSClient.LOG.isDebugEnabled()) {
        DFSClient.LOG.debug("Connecting to datanode " + dnAddr);
    }
    InetSocketAddress targetAddr = NetUtils.createSocketAddr(dnAddr);
    return new DNAddrPair(chosenNode, targetAddr, storageType);
}

From source file:com.mellanox.r4h.DFSInputStream.java

License:Apache License

private int pread(long position, byte[] buffer, int offset, int length) throws IOException {
    // sanity checks
    dfsClient.checkOpen();//from  w  w w  . j a  v a  2s  .c  om
    if (closed.get()) {
        throw new IOException("Stream closed");
    }
    failures = 0;
    long filelen = getFileLength();
    if ((position < 0) || (position >= filelen)) {
        return -1;
    }
    int realLen = length;
    if ((position + length) > filelen) {
        realLen = (int) (filelen - position);
    }

    // determine the block and byte range within the block
    // corresponding to position and realLen
    List<LocatedBlock> blockRange = getBlockRange(position, realLen);
    int remaining = realLen;
    Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap = new HashMap<ExtendedBlock, Set<DatanodeInfo>>();
    for (LocatedBlock blk : blockRange) {
        long targetStart = position - blk.getStartOffset();
        long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart);
        try {
            if (dfsClient.isHedgedReadsEnabled()) {
                hedgedFetchBlockByteRange(blk, targetStart, targetStart + bytesToRead - 1, buffer, offset,
                        corruptedBlockMap);
            } else {
                fetchBlockByteRange(blk, targetStart, targetStart + bytesToRead - 1, buffer, offset,
                        corruptedBlockMap);
            }
        } finally {
            // Check and report if any block replicas are corrupted.
            // BlockMissingException may be caught if all block replicas are
            // corrupted.
            reportCheckSumFailure(corruptedBlockMap, blk.getLocations().length);
        }

        remaining -= bytesToRead;
        position += bytesToRead;
        offset += bytesToRead;
    }
    assert remaining == 0 : "Wrong number of bytes read.";
    if (dfsClient.stats != null) {
        dfsClient.stats.incrementBytesRead(realLen);
    }
    return realLen;
}

From source file:com.splunk.shuttl.prototype.symlink.BucketBlockSymlinkPrototypeTest.java

License:Apache License

private void doSymlinkPathInDir(File fileInDir, LocatedBlocks blockLocations, List<LocatedBlock> locatedBlocks)
        throws IOException {
    assertEquals(1, locatedBlocks.size());
    LocatedBlock locatedBlock = blockLocations.get(0);
    assertEquals(1, locatedBlock.getLocations().length);

    DatanodeInfo datanodeInfo = locatedBlock.getLocations()[0];
    ClientDatanodeProtocol createClientDatanodeProtocolProxy = HadoopFileLocationPrototypeTest
            .createClientDatanodeProtocolProxy(datanodeInfo, hadoopFileSystem.getConf(), 1000);

    BlockLocalPathInfo blockLocalPathInfo = createClientDatanodeProtocolProxy
            .getBlockLocalPathInfo(locatedBlock.getBlock(), locatedBlock.getBlockToken());
    String absolutePathToBlock = blockLocalPathInfo.getBlockPath();
    assertTrue(new File(absolutePathToBlock).exists());
    FileUtil.symLink(absolutePathToBlock, fileInDir.getAbsolutePath());
}