Example usage for org.apache.hadoop.util DataChecksum writeValue

List of usage examples for org.apache.hadoop.util DataChecksum writeValue

Introduction

In this page you can find the example usage for org.apache.hadoop.util DataChecksum writeValue.

Prototype

public int writeValue(byte[] buf, int offset, boolean reset) throws IOException 

Source Link

Document

Writes the current checksum to a buffer.

Usage

From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java

License:Apache License

/**
 *
 * This is the function that calculates the hdfs-style checksum for a local file in the same way that
 * hdfs does it in a parallel fashion on all of the blocks in hdsf.
 *
 * @param strPath//from  ww w  .  j a  v a 2  s.c o m
 * @param bytesPerCRC
 * @param lBlockSize
 * @return
 * @throws IOException
 */
public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC,
        long lBlockSize) throws IOException {
    long lFileSize = 0;
    int iBlockCount = 0;
    DataOutputBuffer md5outDataBuffer = new DataOutputBuffer();
    DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512);
    InputStream in = null;
    MD5MD5CRC32FileChecksum returnChecksum = null;
    long crc_per_block = lBlockSize / bytesPerCRC;

    java.io.File file = new java.io.File(strPath);

    // FileStatus f_stats = srcFs.getFileStatus( srcPath );
    lFileSize = file.length();

    iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize);

    // System.out.println( "Debug > getLen == " + f_stats.getLen() +
    // " bytes" );
    // System.out.println( "Debug > iBlockCount == " + iBlockCount );

    if (file.isDirectory()) {
        throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! ");
    }

    try {
        in = new FileInputStream(file);
        long lTotalBytesRead = 0;

        for (int x = 0; x < iBlockCount; x++) {

            ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream();

            byte crc[] = new byte[4];
            byte buf[] = new byte[512];

            try {

                int bytesRead = 0;

                while ((bytesRead = in.read(buf)) > 0) {

                    lTotalBytesRead += bytesRead;

                    chksm.reset();
                    chksm.update(buf, 0, bytesRead);
                    chksm.writeValue(crc, 0, true);
                    ar_CRC_Bytes.write(crc);

                    if (lTotalBytesRead >= (x + 1) * lBlockSize) {

                        break;
                    }

                } // while

                DataInputStream inputStream = new DataInputStream(
                        new ByteArrayInputStream(ar_CRC_Bytes.toByteArray()));

                // this actually computes one ---- run on the server
                // (DataXceiver) side
                final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream);
                md5_dataxceiver.write(md5outDataBuffer);

            } catch (IOException e) {

                e.printStackTrace();

            } catch (Exception e) {

                e.printStackTrace();

            }

        } // for

        // this is in 0.19.0 style with the extra padding bug
        final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData());
        returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {

        e.printStackTrace();

    } finally {
        in.close();
    } // try

    return returnChecksum;

}