List of usage examples for org.apache.hadoop.util DataChecksum writeValue
public int writeValue(byte[] buf, int offset, boolean reset) throws IOException
From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java
License:Apache License
/** * * This is the function that calculates the hdfs-style checksum for a local file in the same way that * hdfs does it in a parallel fashion on all of the blocks in hdsf. * * @param strPath//from ww w . j a v a 2 s.c o m * @param bytesPerCRC * @param lBlockSize * @return * @throws IOException */ public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC, long lBlockSize) throws IOException { long lFileSize = 0; int iBlockCount = 0; DataOutputBuffer md5outDataBuffer = new DataOutputBuffer(); DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512); InputStream in = null; MD5MD5CRC32FileChecksum returnChecksum = null; long crc_per_block = lBlockSize / bytesPerCRC; java.io.File file = new java.io.File(strPath); // FileStatus f_stats = srcFs.getFileStatus( srcPath ); lFileSize = file.length(); iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize); // System.out.println( "Debug > getLen == " + f_stats.getLen() + // " bytes" ); // System.out.println( "Debug > iBlockCount == " + iBlockCount ); if (file.isDirectory()) { throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! "); } try { in = new FileInputStream(file); long lTotalBytesRead = 0; for (int x = 0; x < iBlockCount; x++) { ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream(); byte crc[] = new byte[4]; byte buf[] = new byte[512]; try { int bytesRead = 0; while ((bytesRead = in.read(buf)) > 0) { lTotalBytesRead += bytesRead; chksm.reset(); chksm.update(buf, 0, bytesRead); chksm.writeValue(crc, 0, true); ar_CRC_Bytes.write(crc); if (lTotalBytesRead >= (x + 1) * lBlockSize) { break; } } // while DataInputStream inputStream = new DataInputStream( new ByteArrayInputStream(ar_CRC_Bytes.toByteArray())); // this actually computes one ---- run on the server // (DataXceiver) side final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream); md5_dataxceiver.write(md5outDataBuffer); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } // for // this is in 0.19.0 style with the extra padding bug final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData()); returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } finally { in.close(); } // try return returnChecksum; }