Example usage for org.apache.hadoop.io MD5Hash write

List of usage examples for org.apache.hadoop.io MD5Hash write

Introduction

In this page you can find the example usage for org.apache.hadoop.io MD5Hash write.

Prototype

@Override
    public void write(DataOutput out) throws IOException 

Source Link

Usage

From source file:com.mellanox.r4h.DFSClient.java

License:Apache License

/**
 * Get the checksum of the whole file of a range of the file. Note that the
 * range always starts from the beginning of the file.
 * // w  ww .  j a v  a2 s.c  om
 * @param src
 *            The file path
 * @param length
 *            the length of the range, i.e., the range is [0, length]
 * @return The checksum
 * @see DistributedFileSystem#getFileChecksum(Path)
 */
public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException {
    checkOpen();
    Preconditions.checkArgument(length >= 0);
    // get block locations for the file range
    LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length);
    if (null == blockLocations) {
        throw new FileNotFoundException("File does not exist: " + src);
    }
    List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
    final DataOutputBuffer md5out = new DataOutputBuffer();
    int bytesPerCRC = -1;
    DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
    long crcPerBlock = 0;
    boolean refetchBlocks = false;
    int lastRetriedIndex = -1;

    // get block checksum for each block
    long remaining = length;
    if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
        remaining = Math.min(length, blockLocations.getFileLength());
    }
    for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) {
        if (refetchBlocks) { // refetch to get fresh tokens
            blockLocations = callGetBlockLocations(namenode, src, 0, length);
            if (null == blockLocations) {
                throw new FileNotFoundException("File does not exist: " + src);
            }
            locatedblocks = blockLocations.getLocatedBlocks();
            refetchBlocks = false;
        }
        LocatedBlock lb = locatedblocks.get(i);
        final ExtendedBlock block = lb.getBlock();
        if (remaining < block.getNumBytes()) {
            block.setNumBytes(remaining);
        }
        remaining -= block.getNumBytes();
        final DatanodeInfo[] datanodes = lb.getLocations();

        // try each datanode location of the block
        final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout();
        boolean done = false;
        for (int j = 0; !done && j < datanodes.length; j++) {
            DataOutputStream out = null;
            DataInputStream in = null;

            try {
                // connect to a datanode
                IOStreamPair pair = connectToDN(datanodes[j], timeout, lb);
                out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE));
                in = new DataInputStream(pair.in);

                if (LOG.isDebugEnabled()) {
                    LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block);
                }
                // get block MD5
                new Sender(out).blockChecksum(block, lb.getBlockToken());

                final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));

                String logInfo = "for block " + block + " from datanode " + datanodes[j];
                DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo);

                OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse();

                // read byte-per-checksum
                final int bpc = checksumData.getBytesPerCrc();
                if (i == 0) { // first block
                    bytesPerCRC = bpc;
                } else if (bpc != bytesPerCRC) {
                    throw new IOException(
                            "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC);
                }

                // read crc-per-block
                final long cpb = checksumData.getCrcPerBlock();
                if (locatedblocks.size() > 1 && i == 0) {
                    crcPerBlock = cpb;
                }

                // read md5
                final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray());
                md5.write(md5out);

                // read crc-type
                final DataChecksum.Type ct;
                if (checksumData.hasCrcType()) {
                    ct = PBHelper.convert(checksumData.getCrcType());
                } else {
                    LOG.debug("Retrieving checksum from an earlier-version DataNode: "
                            + "inferring checksum by reading first byte");
                    ct = inferChecksumTypeByReading(lb, datanodes[j]);
                }

                if (i == 0) { // first block
                    crcType = ct;
                } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) {
                    // if crc types are mixed in a file
                    crcType = DataChecksum.Type.MIXED;
                }

                done = true;

                if (LOG.isDebugEnabled()) {
                    if (i == 0) {
                        LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock);
                    }
                    LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
                }
            } catch (InvalidBlockTokenException ibte) {
                if (i > lastRetriedIndex) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src
                                + " for block " + block + " from datanode " + datanodes[j]
                                + ". Will retry the block once.");
                    }
                    lastRetriedIndex = i;
                    done = true; // actually it's not done; but we'll retry
                    i--; // repeat at i-th block
                    refetchBlocks = true;
                    break;
                }
            } catch (IOException ie) {
                LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie);
            } finally {
                IOUtils.closeStream(in);
                IOUtils.closeStream(out);
            }
        }

        if (!done) {
            throw new IOException("Fail to get block MD5 for " + block);
        }
    }

    // compute file MD5
    final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
    switch (crcType) {
    case CRC32:
        return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    case CRC32C:
        return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
    default:
        // If there is no block allocated for the file,
        // return one with the magic entry that matches what previous
        // hdfs versions return.
        if (locatedblocks.size() == 0) {
            return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
        }

        // we should never get here since the validity was checked
        // when getCrcType() was called above.
        return null;
    }
}

From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java

License:Apache License

/**
 *
 * This is the function that calculates the hdfs-style checksum for a local file in the same way that
 * hdfs does it in a parallel fashion on all of the blocks in hdsf.
 *
 * @param strPath//from   w  w w .  ja v a2s  .c  om
 * @param bytesPerCRC
 * @param lBlockSize
 * @return
 * @throws IOException
 */
public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC,
        long lBlockSize) throws IOException {
    long lFileSize = 0;
    int iBlockCount = 0;
    DataOutputBuffer md5outDataBuffer = new DataOutputBuffer();
    DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512);
    InputStream in = null;
    MD5MD5CRC32FileChecksum returnChecksum = null;
    long crc_per_block = lBlockSize / bytesPerCRC;

    java.io.File file = new java.io.File(strPath);

    // FileStatus f_stats = srcFs.getFileStatus( srcPath );
    lFileSize = file.length();

    iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize);

    // System.out.println( "Debug > getLen == " + f_stats.getLen() +
    // " bytes" );
    // System.out.println( "Debug > iBlockCount == " + iBlockCount );

    if (file.isDirectory()) {
        throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! ");
    }

    try {
        in = new FileInputStream(file);
        long lTotalBytesRead = 0;

        for (int x = 0; x < iBlockCount; x++) {

            ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream();

            byte crc[] = new byte[4];
            byte buf[] = new byte[512];

            try {

                int bytesRead = 0;

                while ((bytesRead = in.read(buf)) > 0) {

                    lTotalBytesRead += bytesRead;

                    chksm.reset();
                    chksm.update(buf, 0, bytesRead);
                    chksm.writeValue(crc, 0, true);
                    ar_CRC_Bytes.write(crc);

                    if (lTotalBytesRead >= (x + 1) * lBlockSize) {

                        break;
                    }

                } // while

                DataInputStream inputStream = new DataInputStream(
                        new ByteArrayInputStream(ar_CRC_Bytes.toByteArray()));

                // this actually computes one ---- run on the server
                // (DataXceiver) side
                final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream);
                md5_dataxceiver.write(md5outDataBuffer);

            } catch (IOException e) {

                e.printStackTrace();

            } catch (Exception e) {

                e.printStackTrace();

            }

        } // for

        // this is in 0.19.0 style with the extra padding bug
        final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData());
        returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {

        e.printStackTrace();

    } finally {
        in.close();
    } // try

    return returnChecksum;

}