Example usage for org.apache.hadoop.util DataChecksum update

List of usage examples for org.apache.hadoop.util DataChecksum update

Introduction

In this page you can find the example usage for org.apache.hadoop.util DataChecksum update.

Prototype

@Override
    public void update(byte[] b, int off, int len) 

Source Link

Usage

From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java

License:Apache License

/**
 *
 * This is the function that calculates the hdfs-style checksum for a local file in the same way that
 * hdfs does it in a parallel fashion on all of the blocks in hdsf.
 *
 * @param strPath//ww  w  . j a v a2s. co  m
 * @param bytesPerCRC
 * @param lBlockSize
 * @return
 * @throws IOException
 */
public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC,
        long lBlockSize) throws IOException {
    long lFileSize = 0;
    int iBlockCount = 0;
    DataOutputBuffer md5outDataBuffer = new DataOutputBuffer();
    DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512);
    InputStream in = null;
    MD5MD5CRC32FileChecksum returnChecksum = null;
    long crc_per_block = lBlockSize / bytesPerCRC;

    java.io.File file = new java.io.File(strPath);

    // FileStatus f_stats = srcFs.getFileStatus( srcPath );
    lFileSize = file.length();

    iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize);

    // System.out.println( "Debug > getLen == " + f_stats.getLen() +
    // " bytes" );
    // System.out.println( "Debug > iBlockCount == " + iBlockCount );

    if (file.isDirectory()) {
        throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! ");
    }

    try {
        in = new FileInputStream(file);
        long lTotalBytesRead = 0;

        for (int x = 0; x < iBlockCount; x++) {

            ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream();

            byte crc[] = new byte[4];
            byte buf[] = new byte[512];

            try {

                int bytesRead = 0;

                while ((bytesRead = in.read(buf)) > 0) {

                    lTotalBytesRead += bytesRead;

                    chksm.reset();
                    chksm.update(buf, 0, bytesRead);
                    chksm.writeValue(crc, 0, true);
                    ar_CRC_Bytes.write(crc);

                    if (lTotalBytesRead >= (x + 1) * lBlockSize) {

                        break;
                    }

                } // while

                DataInputStream inputStream = new DataInputStream(
                        new ByteArrayInputStream(ar_CRC_Bytes.toByteArray()));

                // this actually computes one ---- run on the server
                // (DataXceiver) side
                final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream);
                md5_dataxceiver.write(md5outDataBuffer);

            } catch (IOException e) {

                e.printStackTrace();

            } catch (Exception e) {

                e.printStackTrace();

            }

        } // for

        // this is in 0.19.0 style with the extra padding bug
        final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData());
        returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {

        e.printStackTrace();

    } finally {
        in.close();
    } // try

    return returnChecksum;

}