org.kitesdk.cli.commands.TestTarImportCommand.java Source code

Java tutorial

Introduction

Here is the source code for org.kitesdk.cli.commands.TestTarImportCommand.java

Source

/*
 * Copyright 2014 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kitesdk.cli.commands;

import com.beust.jcommander.internal.Lists;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.*;
import org.kitesdk.cli.TestUtil;
import org.slf4j.Logger;

import java.io.File;
import java.io.IOException;
import java.util.Random;

import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;

public class TestTarImportCommand {

    private Logger console = null;
    private TarImportCommand command;

    private static String datasetUri;

    private static final int NUM_TEST_FILES = 10;
    private static final int MAX_FILENAME_LENGTH = 50;
    private static final int MAX_FILECONTENT_LENGTH = 5000;

    private static final String TEST_DATASET_NAME = "tarimport";
    private static final String TEST_DATASET_DIR = "target/datasets";
    private static final String TEST_DATA_DIR = "target/testdata";
    private static final String TAR_TEST_FILE = TEST_DATA_DIR + "/test.tar";
    private static final String TAR_TEST_ROOT_PREFIX = "testnone";
    private static final String TAR_TEST_GZIP_FILE = TEST_DATA_DIR + "/test.tar.gz";
    private static final String TAR_TEST_GZIP_ROOT_PREFIX = "testgzip";
    private static final String TAR_TEST_BZIP2_FILE = TEST_DATA_DIR + "/test.tar.bz2";
    private static final String TAR_TEST_BZIP2_ROOT_PREFIX = "testbzip2";
    private static final String TAR_TEST_LARGE_ENTRY_FILE = TEST_DATA_DIR + "/testlarge.tar";

    @BeforeClass
    public static void createTestInputFiles() throws IOException {
        TestTarImportCommand.cleanup();

        Path testData = new Path(TEST_DATA_DIR);
        FileSystem testFS = testData.getFileSystem(new Configuration());

        datasetUri = "dataset:file:" + System.getProperty("user.dir") + "/" + TEST_DATASET_DIR + "/"
                + TEST_DATASET_NAME;

        TarArchiveOutputStream tosNoCompression = null;
        TarArchiveOutputStream tosGzipCompression = null;
        TarArchiveOutputStream tosBzip2Compression = null;
        TarArchiveOutputStream tosLargeEntry = null;
        TarArchiveEntry tarArchiveEntry = null;
        try {
            // No compression
            tosNoCompression = new TarArchiveOutputStream(testFS.create(new Path(TAR_TEST_FILE), true));
            writeToTarFile(tosNoCompression, TAR_TEST_ROOT_PREFIX + "/", null);

            // Gzip compression
            tosGzipCompression = new TarArchiveOutputStream(
                    new GzipCompressorOutputStream(testFS.create(new Path(TAR_TEST_GZIP_FILE), true)));
            writeToTarFile(tosGzipCompression, TAR_TEST_GZIP_ROOT_PREFIX + "/", null);

            // BZip2 compression
            tosBzip2Compression = new TarArchiveOutputStream(
                    new BZip2CompressorOutputStream(testFS.create(new Path(TAR_TEST_BZIP2_FILE), true)));
            writeToTarFile(tosBzip2Compression, TAR_TEST_BZIP2_ROOT_PREFIX + "/", null);

            // "Large" entry file (10000 bytes)
            tosLargeEntry = new TarArchiveOutputStream(testFS.create(new Path(TAR_TEST_LARGE_ENTRY_FILE), true));
            String largeEntry = RandomStringUtils.randomAscii(10000);
            writeToTarFile(tosLargeEntry, "largeEntry", largeEntry);

            // Generate test files with random names and content
            Random random = new Random(1);
            for (int i = 0; i < NUM_TEST_FILES; ++i) {
                // Create random file and data
                int fNameLength = random.nextInt(MAX_FILENAME_LENGTH);
                int fContentLength = random.nextInt(MAX_FILECONTENT_LENGTH);
                String fName = RandomStringUtils.randomAlphanumeric(fNameLength);
                String fContent = RandomStringUtils.randomAscii(fContentLength);

                // Write the file to tarball
                writeToTarFile(tosNoCompression, TAR_TEST_ROOT_PREFIX + "/" + fName, fContent);
                writeToTarFile(tosGzipCompression, TAR_TEST_GZIP_ROOT_PREFIX + "/" + fName, fContent);
                writeToTarFile(tosBzip2Compression, TAR_TEST_BZIP2_ROOT_PREFIX + "/" + fName, fContent);

                System.out.println("Wrote " + fName + " [" + fContentLength + "]");
            }
        } finally {
            IOUtils.closeStream(tosNoCompression);
            IOUtils.closeStream(tosGzipCompression);
            IOUtils.closeStream(tosBzip2Compression);
            IOUtils.closeStream(tosLargeEntry);
        }
    }

    @AfterClass
    public static void cleanupTests() throws IOException {
        TestTarImportCommand.cleanup();
    }

    private static void cleanup() throws IOException {
        FileUtils.deleteDirectory(new File(TEST_DATA_DIR));
        FileUtils.deleteDirectory(new File(TEST_DATASET_DIR));
    }

    private static void writeToTarFile(TarArchiveOutputStream tos, String name, String content) throws IOException {
        TarArchiveEntry tarArchiveEntry = new TarArchiveEntry(name);
        if (null != content) {
            tarArchiveEntry.setSize(content.length());
        }
        tarArchiveEntry.setModTime(System.currentTimeMillis());
        tos.putArchiveEntry(tarArchiveEntry);
        if (null != content) {
            byte[] buf = content.getBytes();
            tos.write(buf, 0, content.length());
            tos.flush();
        }
        tos.closeArchiveEntry();
    }

    @Test
    public void testTarNoCompressionImportCreateDataset() throws Exception {
        removeData();
        command.targets = Lists.newArrayList(TAR_TEST_FILE, datasetUri);
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.NONE);
        verify(console).info("Creating new dataset: {}", datasetUri);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarNoCompressionImportCommand() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_FILE, datasetUri);
        command.compressionType = "none";
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.NONE);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarNoCompressionImportCommandAutoDetect() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_FILE, datasetUri);
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.NONE);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarGzipImportCommand() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_GZIP_FILE, datasetUri);
        command.compressionType = "gzip";
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.GZIP);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarGzipImportCommandAutoDetect() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_GZIP_FILE, datasetUri);
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.GZIP);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarBzip2ImportCommand() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_BZIP2_FILE, datasetUri);
        command.compressionType = "bzip2";
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.BZIP2);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testTarBzip2ImportCommandAutoDetect() throws IOException {
        command.targets = Lists.newArrayList(TAR_TEST_BZIP2_FILE, datasetUri);
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.BZIP2);
        verify(console).info("Added {} records to \"{}\"", NUM_TEST_FILES, TEST_DATASET_NAME);
    }

    @Test
    public void testWarnOnLargeEntry() throws IOException {
        command.getConf().setLong("dfs.blocksize", 5000);
        command.targets = Lists.newArrayList(TAR_TEST_LARGE_ENTRY_FILE, datasetUri);
        assertEquals(0, command.run());
        verify(console).info("Using {} compression", TarImportCommand.CompressionType.NONE);
        verify(console).warn(
                "Entry \"{}\" (size {}) is larger than the "
                        + "HDFS block size of {}. This may result in remote block reads",
                new Object[] { "largeEntry", 10000l, 5000l });
        verify(console).info("Added {} records to \"{}\"", 1, TEST_DATASET_NAME);
    }

    @Before
    public void setup() throws Exception {
        TestUtil.run("-v", "create", datasetUri, "-s",
                "kite-tools-parent/kite-tools/src/main/avro/tar-import.avsc");
        this.console = mock(Logger.class);
        this.command = new TarImportCommand(console);
        command.setConf(new Configuration());
        command.directory = TEST_DATASET_DIR;
        command.local = true;
    }

    @After
    public void removeData() throws Exception {
        TestUtil.run("delete", TEST_DATASET_NAME, "--use-local", "-d", TEST_DATASET_DIR);
    }

}