com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java Source code

Java tutorial

Introduction

Here is the source code for com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksumsTest.java

Source

package com.thinkbiganalytics.nifi.v2.hdfs;

/*-
 * #%L
 * thinkbig-nifi-hadoop-processors
 * %%
 * Copyright (C) 2017 ThinkBig Analytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.google.gson.Gson;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MD5Hash;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.util.MockProcessContext;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.InOrder;
import org.mockito.Mockito;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import static org.mockito.Matchers.any;

public class ComputeHDFSChecksumsTest {

    /**
     * Mock file system
     */
    private final FileSystem fileSystem = Mockito.mock(FileSystem.class);

    /**
     * Test runner
     */
    private final TestRunner runner = TestRunners.newTestRunner(new TestableComputeHDFSChecksums());

    private final String fileEntry = "{\n" + "\"name\": \"%s\",\n" + "\"size\": 131665,\n" + "\"checksum\": {\n"
            + "\"length\": 28,\n" + "\"value\": \"%s\",\n" + "\"algorithm\": \"MD5-of-0MD5-of-512CRC32C\"\n" + "}\n"
            + "}";

    /**
     * Initialize instance variables
     */
    @Before
    public void setUp() throws Exception {
        // Setup mock file system
        Mockito.when(fileSystem.delete(any(Path.class), Mockito.eq(true))).thenReturn(true);

        // Setup test runner
        runner.setValidateExpressionUsage(false);
    }

    /**
     * Verify required properties.
     */
    @Test
    public void testValidators() {
        String files = ComputeHDFSChecksums.FILES.getName();

        // Test with no properties
        Collection<ValidationResult> results = validate(runner);
        Assert.assertEquals(1, results.size());
        results.forEach((ValidationResult result) -> Assert.assertEquals(
                String.format("'%s' is invalid because %s is required", files, files), result.toString()));

        // Test with required properties present
        runner.setProperty(ComputeHDFSChecksums.FILES, "[]");
        results = validate(runner);
        Assert.assertEquals(0, results.size());

        // Test with additional property DIRECTORY set
        runner.setProperty(ComputeHDFSChecksums.DIRECTORY, "/dropzone");
        results = validate(runner);
        Assert.assertEquals(0, results.size());
    }

    @Test
    public void testFilesListAttributeNotJSON() {

        runner.setProperty(ComputeHDFSChecksums.FILES, "a");
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testFileListAttributeNotArray() {
        runner.setProperty(ComputeHDFSChecksums.FILES, "{}");
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testFileListAttributeEmtpy() {
        runner.setProperty(ComputeHDFSChecksums.FILES, "");
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testFileListAttributeEmptyArray() {
        runner.setProperty(ComputeHDFSChecksums.FILES, "[]");
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testSingleFileInListDontFailOnWrongChecksum() throws Exception {
        String fileName = "000000_0";

        Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
                .when(fileSystem).getFileChecksum(any(Path.class));

        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "False");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA=="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check whether checksum was passed correctly to attributes
        String filesJSON = runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).get(0)
                .getAttribute("files");
        Gson jsonParser = new Gson();
        ComputeHDFSChecksums.File[] files = jsonParser.fromJson(filesJSON, ComputeHDFSChecksums.File[].class);
        Assert.assertEquals(files[0].getComputedChecksum().getValue(),
                "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=");

        // Check file system calls
        verifyGetFileChecksumCall(fileName);
    }

    @Test
    public void testSingleFileInListFailOnWrongChecksum() throws Exception {
        String fileName = "000000_0";

        Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
                .when(fileSystem).getFileChecksum(any(Path.class));

        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA=="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        verifyGetFileChecksumCall(fileName);
    }

    @Test
    public void testSingleFileProperChecksum() throws Exception {
        String fileName = "000000_0";

        Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
                .when(fileSystem).getFileChecksum(any(Path.class));

        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        verifyGetFileChecksumCall(fileName);
    }

    @Test
    public void testMultipleFilesFailOnSingleWrongChecksum() throws Exception {
        String fileName = "000000_0";
        String fileName2 = "000000_1";
        String fileName3 = "000000_2";

        Mockito.when(fileSystem.getFileChecksum(any(Path.class)))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01")))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02")));

        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName3,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verify(fileSystem).getFileChecksum(new Path(fileName));
        inOrder.verify(fileSystem).getFileChecksum(new Path(fileName2));
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testMultipleFilesWithDirectoryDefined() throws Exception {
        String fileName = "000000_0";
        String fileName2 = "000000_1";
        String fileName3 = "000000_2";
        String directory = "/dropzone";

        Mockito.when(fileSystem.getFileChecksum(any(Path.class)))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00")))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff01")))
                .thenReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff02")));

        runner.setProperty(ComputeHDFSChecksums.DIRECTORY, directory);
        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "," + fileEntry + "," + fileEntry + "]", fileName,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=", fileName2,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AQAAAAA=", fileName3,
                        "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AgAAAAA="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName));
        inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName2));
        inOrder.verify(fileSystem).getFileChecksum(new Path(directory, fileName3));
        inOrder.verifyNoMoreInteractions();
    }

    @Test
    public void testFileNotFoundException() throws Exception {
        String fileName = "000000_0";

        Mockito.doThrow(new FileNotFoundException()).when(fileSystem).getFileChecksum(any(Path.class));

        runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
        runner.setProperty(ComputeHDFSChecksums.FILES,
                String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="));
        runner.enqueue(new byte[0]);
        runner.run();

        // Check relationships
        Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
        Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());

        // Check file system calls
        verifyGetFileChecksumCall(fileName);
    }

    /**
     * Enqueues a {@code FlowFile} and validates its properties.
     *
     * @param runner the test runner
     * @return the validation results
     */
    @Nonnull
    private Collection<ValidationResult> validate(@Nonnull final TestRunner runner) {
        runner.enqueue(new byte[0]);
        return ((MockProcessContext) runner.getProcessContext()).validate();
    }

    private void verifyGetFileChecksumCall(String fileName) throws Exception {
        InOrder inOrder = Mockito.inOrder(fileSystem);
        inOrder.verify(fileSystem).getFileChecksum(new Path(fileName));
        inOrder.verifyNoMoreInteractions();
    }

    /**
     * A mock {@code ComputeHDFSChecksums} for testing.
     */
    private class TestableComputeHDFSChecksums extends ComputeHDFSChecksums {

        @Nullable
        @Override
        protected FileSystem getFileSystem(@Nonnull ProcessContext context) {
            return fileSystem;
        }

        @Override
        HdfsResources resetHDFSResources(String configResources, String dir, ProcessContext context)
                throws IOException {
            return null;
        }
    }

}