List of usage examples for org.apache.hadoop.fs MD5MD5CRC32FileChecksum MD5MD5CRC32FileChecksum
public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5)
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java
License:Apache License
@Test public void testSingleFileInListDontFailOnWrongChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "False"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA==")); runner.enqueue(new byte[0]); runner.run();/*ww w . ja va 2s. c om*/ // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check whether checksum was passed correctly to attributes String filesJSON = runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).get(0) .getAttribute("files"); Gson jsonParser = new Gson(); ComputeHDFSChecksums.File[] files = jsonParser.fromJson(filesJSON, ComputeHDFSChecksums.File[].class); Assert.assertEquals(files[0].getComputedChecksum().getValue(), "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="); // Check file system calls verifyGetFileChecksumCall(fileName); }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java
License:Apache License
@Test public void testSingleFileInListFailOnWrongChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA==")); runner.enqueue(new byte[0]); runner.run();//from w ww . j a v a 2s. co m // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls verifyGetFileChecksumCall(fileName); }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java
License:Apache License
@Test public void testSingleFileProperChecksum() throws Exception { String fileName = "000000_0"; Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .when(fileSystem).getFileChecksum(any(Path.class)); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=")); runner.enqueue(new byte[0]); runner.run();//from w w w . j av a 2s .c o m // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls verifyGetFileChecksumCall(fileName); }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java
License:Apache License
@Test public void testMultipleFilesFailOnSingleWrongChecksum() throws Exception { String fileName = "000000_0"; String fileName2 = "000000_1"; String fileName3 = "000000_2"; Mockito.when(fileSystem.getFileChecksum(any(Path.class))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02"))); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName3, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA=")); runner.enqueue(new byte[0]); runner.run();/*from w w w. java2 s .c o m*/ // Check relationships Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verify(fileSystem).getFileChecksum(new Path(fileName)); inOrder.verify(fileSystem).getFileChecksum(new Path(fileName2)); inOrder.verifyNoMoreInteractions(); }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java
License:Apache License
@Test public void testMultipleFilesWithDirectoryDefined() throws Exception { String fileName = "000000_0"; String fileName2 = "000000_1"; String fileName3 = "000000_2"; String directory = "/dropzone"; Mockito.when(fileSystem.getFileChecksum(any(Path.class))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01"))) .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02"))); runner.setProperty(ComputeHDFSChecksums.DIRECTORY, directory); runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True"); runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AQAAAAA=", fileName3, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA=")); runner.enqueue(new byte[0]); runner.run();//w ww . j a v a2s. co m // Check relationships Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size()); Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size()); // Check file system calls InOrder inOrder = Mockito.inOrder(fileSystem); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName)); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName2)); inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName3)); inOrder.verifyNoMoreInteractions(); }
From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java
License:Apache License
/** * * This is the function that calculates the hdfs-style checksum for a local file in the same way that * hdfs does it in a parallel fashion on all of the blocks in hdsf. * * @param strPath//from w w w .jav a 2s . com * @param bytesPerCRC * @param lBlockSize * @return * @throws IOException */ public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC, long lBlockSize) throws IOException { long lFileSize = 0; int iBlockCount = 0; DataOutputBuffer md5outDataBuffer = new DataOutputBuffer(); DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512); InputStream in = null; MD5MD5CRC32FileChecksum returnChecksum = null; long crc_per_block = lBlockSize / bytesPerCRC; java.io.File file = new java.io.File(strPath); // FileStatus f_stats = srcFs.getFileStatus( srcPath ); lFileSize = file.length(); iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize); // System.out.println( "Debug > getLen == " + f_stats.getLen() + // " bytes" ); // System.out.println( "Debug > iBlockCount == " + iBlockCount ); if (file.isDirectory()) { throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! "); } try { in = new FileInputStream(file); long lTotalBytesRead = 0; for (int x = 0; x < iBlockCount; x++) { ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream(); byte crc[] = new byte[4]; byte buf[] = new byte[512]; try { int bytesRead = 0; while ((bytesRead = in.read(buf)) > 0) { lTotalBytesRead += bytesRead; chksm.reset(); chksm.update(buf, 0, bytesRead); chksm.writeValue(crc, 0, true); ar_CRC_Bytes.write(crc); if (lTotalBytesRead >= (x + 1) * lBlockSize) { break; } } // while DataInputStream inputStream = new DataInputStream( new ByteArrayInputStream(ar_CRC_Bytes.toByteArray())); // this actually computes one ---- run on the server // (DataXceiver) side final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream); md5_dataxceiver.write(md5outDataBuffer); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } // for // this is in 0.19.0 style with the extra padding bug final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData()); returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } finally { in.close(); } // try return returnChecksum; }