Example usage for org.apache.hadoop.fs MD5MD5CRC32FileChecksum MD5MD5CRC32FileChecksum

List of usage examples for org.apache.hadoop.fs MD5MD5CRC32FileChecksum MD5MD5CRC32FileChecksum

Introduction

In this page you can find the example usage for org.apache.hadoop.fs MD5MD5CRC32FileChecksum MD5MD5CRC32FileChecksum.

Prototype

public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) 

Source Link

Document

Create a MD5FileChecksum

Usage

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

License:Apache License

@Test
public void testSingleFileInListDontFailOnWrongChecksum() throws Exception {
    String fileName = "000000_0";

    Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
            .when(fileSystem).getFileChecksum(any(Path.class));

    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "False");
    runner.setProperty(ComputeHDFSChecksums.FILES,
            String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA=="));
    runner.enqueue(new byte[0]);
    runner.run();/*ww  w  .  ja va  2s. c om*/

    // Check relationships
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

    // Check whether checksum was passed correctly to attributes
    String filesJSON = runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).get(0)
            .getAttribute("files");
    Gson jsonParser = new Gson();
    ComputeHDFSChecksums.File[] files = jsonParser.fromJson(filesJSON, ComputeHDFSChecksums.File[].class);
    Assert.assertEquals(files[0].getComputedChecksum().getValue(),
            "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=");

    // Check file system calls
    verifyGetFileChecksumCall(fileName);
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

License:Apache License

@Test
public void testSingleFileInListFailOnWrongChecksum() throws Exception {
    String fileName = "000000_0";

    Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
            .when(fileSystem).getFileChecksum(any(Path.class));

    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
    runner.setProperty(ComputeHDFSChecksums.FILES,
            String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA=="));
    runner.enqueue(new byte[0]);
    runner.run();//from   w ww  . j a  v  a 2s. co  m

    // Check relationships
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

    // Check file system calls
    verifyGetFileChecksumCall(fileName);
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

License:Apache License

@Test
public void testSingleFileProperChecksum() throws Exception {
    String fileName = "000000_0";

    Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
            .when(fileSystem).getFileChecksum(any(Path.class));

    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
    runner.setProperty(ComputeHDFSChecksums.FILES,
            String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="));
    runner.enqueue(new byte[0]);
    runner.run();//from  w  w  w  . j  av  a 2s .c o m

    // Check relationships
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

    // Check file system calls
    verifyGetFileChecksumCall(fileName);
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

License:Apache License

@Test
public void testMultipleFilesFailOnSingleWrongChecksum() throws Exception {
    String fileName = "000000_0";
    String fileName2 = "000000_1";
    String fileName3 = "000000_2";

    Mockito.when(fileSystem.getFileChecksum(any(Path.class)))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01")))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02")));

    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
    runner.setProperty(ComputeHDFSChecksums.FILES,
            String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName3,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA="));
    runner.enqueue(new byte[0]);
    runner.run();/*from w  w w. java2 s  .c o m*/

    // Check relationships
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

    // Check file system calls
    InOrder inOrder = Mockito.inOrder(fileSystem);
    inOrder.verify(fileSystem).getFileChecksum(new Path(fileName));
    inOrder.verify(fileSystem).getFileChecksum(new Path(fileName2));
    inOrder.verifyNoMoreInteractions();
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

License:Apache License

@Test
public void testMultipleFilesWithDirectoryDefined() throws Exception {
    String fileName = "000000_0";
    String fileName2 = "000000_1";
    String fileName3 = "000000_2";
    String directory = "/dropzone";

    Mockito.when(fileSystem.getFileChecksum(any(Path.class)))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01")))
            .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02")));

    runner.setProperty(ComputeHDFSChecksums.DIRECTORY, directory);
    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
    runner.setProperty(ComputeHDFSChecksums.FILES,
            String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AQAAAAA=", fileName3,
                    "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA="));
    runner.enqueue(new byte[0]);
    runner.run();//w  ww .  j a  v a2s. co m

    // Check relationships
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

    // Check file system calls
    InOrder inOrder = Mockito.inOrder(fileSystem);
    inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName));
    inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName2));
    inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName3));
    inOrder.verifyNoMoreInteractions();
}

From source file:com.tripadvisor.hadoop.ExternalHDFSChecksumGenerator.java

License:Apache License

/**
 *
 * This is the function that calculates the hdfs-style checksum for a local file in the same way that
 * hdfs does it in a parallel fashion on all of the blocks in hdsf.
 *
 * @param strPath//from w  w w  .jav a 2s  .  com
 * @param bytesPerCRC
 * @param lBlockSize
 * @return
 * @throws IOException
 */
public MD5MD5CRC32FileChecksum getLocalFilesystemHDFSStyleChecksum(String strPath, int bytesPerCRC,
        long lBlockSize) throws IOException {
    long lFileSize = 0;
    int iBlockCount = 0;
    DataOutputBuffer md5outDataBuffer = new DataOutputBuffer();
    DataChecksum chksm = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32, 512);
    InputStream in = null;
    MD5MD5CRC32FileChecksum returnChecksum = null;
    long crc_per_block = lBlockSize / bytesPerCRC;

    java.io.File file = new java.io.File(strPath);

    // FileStatus f_stats = srcFs.getFileStatus( srcPath );
    lFileSize = file.length();

    iBlockCount = (int) Math.ceil((double) lFileSize / (double) lBlockSize);

    // System.out.println( "Debug > getLen == " + f_stats.getLen() +
    // " bytes" );
    // System.out.println( "Debug > iBlockCount == " + iBlockCount );

    if (file.isDirectory()) {
        throw new IOException("Cannot compute local hdfs hash, " + strPath + " is a directory! ");
    }

    try {
        in = new FileInputStream(file);
        long lTotalBytesRead = 0;

        for (int x = 0; x < iBlockCount; x++) {

            ByteArrayOutputStream ar_CRC_Bytes = new ByteArrayOutputStream();

            byte crc[] = new byte[4];
            byte buf[] = new byte[512];

            try {

                int bytesRead = 0;

                while ((bytesRead = in.read(buf)) > 0) {

                    lTotalBytesRead += bytesRead;

                    chksm.reset();
                    chksm.update(buf, 0, bytesRead);
                    chksm.writeValue(crc, 0, true);
                    ar_CRC_Bytes.write(crc);

                    if (lTotalBytesRead >= (x + 1) * lBlockSize) {

                        break;
                    }

                } // while

                DataInputStream inputStream = new DataInputStream(
                        new ByteArrayInputStream(ar_CRC_Bytes.toByteArray()));

                // this actually computes one ---- run on the server
                // (DataXceiver) side
                final MD5Hash md5_dataxceiver = MD5Hash.digest(inputStream);
                md5_dataxceiver.write(md5outDataBuffer);

            } catch (IOException e) {

                e.printStackTrace();

            } catch (Exception e) {

                e.printStackTrace();

            }

        } // for

        // this is in 0.19.0 style with the extra padding bug
        final MD5Hash md5_of_md5 = MD5Hash.digest(md5outDataBuffer.getData());
        returnChecksum = new MD5MD5CRC32FileChecksum(bytesPerCRC, crc_per_block, md5_of_md5);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (Exception e) {

        e.printStackTrace();

    } finally {
        in.close();
    } // try

    return returnChecksum;

}