gobblin.data.management.copy.publisher.CopyDataPublisherTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.copy.publisher.CopyDataPublisherTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.data.management.copy.publisher;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.data.management.copy.CopyConfiguration;
import gobblin.data.management.copy.CopySource;
import gobblin.data.management.copy.CopyableDataset;
import gobblin.data.management.copy.CopyableDatasetMetadata;
import gobblin.data.management.copy.CopyEntity;
import gobblin.data.management.copy.CopyableFile;
import gobblin.data.management.copy.PreserveAttributes;
import gobblin.data.management.copy.TestCopyableDataset;
import gobblin.util.PathUtils;

import java.io.IOException;
import java.util.Collection;
import java.util.List;

import lombok.extern.slf4j.Slf4j;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.Closer;
import com.google.common.io.Files;

/*
 *
 *  Test cases covered
 * - Single dataset multiple files/workunits
 * - Single dataset multiple files/workunits few workunits failed
 * - Two datasets multiple files
 * - Two datasets one of them failed to publish
 * - datasets with overlapping dataset roots
 *
 */
@Slf4j
public class CopyDataPublisherTest {

    private static final Closer closer = Closer.create();

    private FileSystem fs;
    private Path testClassTempPath;

    @Test
    public void testPublishSingleDataset() throws Exception {

        State state = getTestState("testPublishSingleDataset");
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");

        Path testMethodTempPath = new Path(testClassTempPath, "testPublishSingleDataset");

        CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));

        TestDatasetManager datasetManager = new TestDatasetManager(testMethodTempPath, state, "datasetTargetPath",
                ImmutableList.of("a/b", "a/c", "d/e"));

        datasetManager.createDatasetFiles();

        datasetManager.verifyDoesntExist();

        copyDataPublisher.publishData(datasetManager.getWorkUnitStates());

        datasetManager.verifyExists();

    }

    @Test
    @SuppressWarnings("unchecked")
    public void testPublishMultipleDatasets() throws Exception {

        State state = getTestState("testPublishMultipleDatasets");
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");

        Path testMethodTempPath = new Path(testClassTempPath, "testPublishMultipleDatasets");

        CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));

        TestDatasetManager dataset1Manager = new TestDatasetManager(testMethodTempPath, state, "dataset1TargetPath",
                ImmutableList.of("a/b", "a/c", "d/e"));

        dataset1Manager.createDatasetFiles();

        TestDatasetManager dataset2Manager = new TestDatasetManager(testMethodTempPath, state, "dataset2TargetPath",
                ImmutableList.of("a/b", "a/c", "d/e"));

        dataset2Manager.createDatasetFiles();

        dataset1Manager.verifyDoesntExist();

        dataset2Manager.verifyDoesntExist();

        copyDataPublisher
                .publishData(combine(dataset1Manager.getWorkUnitStates(), dataset2Manager.getWorkUnitStates()));

        dataset1Manager.verifyExists();

        dataset2Manager.verifyExists();

    }

    @Test
    @SuppressWarnings("unchecked")
    public void testPublishOverlappingDatasets() throws Exception {

        State state = getTestState("testPublishOverlappingDatasets");
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");

        Path testMethodTempPath = new Path(testClassTempPath, "testPublishOverlappingDatasets");

        CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));

        TestDatasetManager dataset1Manager = new TestDatasetManager(testMethodTempPath, state, "datasetTargetPath",
                ImmutableList.of("a/b"));

        dataset1Manager.createDatasetFiles();

        TestDatasetManager dataset2Manager = new TestDatasetManager(testMethodTempPath, state,
                "datasetTargetPath/subDir", ImmutableList.of("a/c", "d/e"));

        dataset2Manager.createDatasetFiles();

        dataset1Manager.verifyDoesntExist();

        dataset2Manager.verifyDoesntExist();

        copyDataPublisher
                .publishData(combine(dataset1Manager.getWorkUnitStates(), dataset2Manager.getWorkUnitStates()));

        dataset1Manager.verifyExists();

        dataset2Manager.verifyExists();

    }

    @Test
    @SuppressWarnings("unchecked")
    public void testPublishDatasetFailure() throws Exception {

        State state = getTestState("testPublishDatasetFailure");
        state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/");

        Path testMethodTempPath = new Path(testClassTempPath, "testPublishDatasetFailure");

        CopyDataPublisher copyDataPublisher = closer.register(new CopyDataPublisher(state));

        TestDatasetManager successDatasetManager = new TestDatasetManager(testMethodTempPath, state,
                "successTargetPath", ImmutableList.of("a/b"));

        successDatasetManager.createDatasetFiles();

        TestDatasetManager failedDatasetManager = new TestDatasetManager(testMethodTempPath, state,
                "failedTargetPath", ImmutableList.of("c/d"));

        successDatasetManager.verifyDoesntExist();

        failedDatasetManager.verifyDoesntExist();

        copyDataPublisher.publishData(
                combine(successDatasetManager.getWorkUnitStates(), failedDatasetManager.getWorkUnitStates()));

        successDatasetManager.verifyExists();

        failedDatasetManager.verifyDoesntExist();

    }

    @BeforeClass
    public void setup() throws Exception {
        fs = FileSystem.getLocal(new Configuration());
        testClassTempPath = new Path(Files.createTempDir().getAbsolutePath(), "CopyDataPublisherTest");
        fs.delete(testClassTempPath, true);
        log.info("Created a temp directory for CopyDataPublisherTest at " + testClassTempPath);
        fs.mkdirs(testClassTempPath);
    }

    private static Collection<? extends WorkUnitState> combine(List<WorkUnitState>... workUnitStateLists) {
        List<WorkUnitState> wus = Lists.newArrayList();
        for (List<WorkUnitState> workUnitStates : workUnitStateLists) {
            wus.addAll(workUnitStates);
        }

        return wus;
    }

    private State getTestState(String testMethodName) {
        return getTestState(testMethodName, testClassTempPath);
    }

    public static State getTestState(String testMethodName, Path testClassTempPath) {

        Path testMethodPath = new Path(testClassTempPath, testMethodName);
        State state = new State();
        state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testMethodPath, "task-output"));
        state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testMethodPath, "task-staging"));
        state.setProp(ConfigurationKeys.JOB_ID_KEY, "jobid");

        return state;
    }

    public static class TestDatasetManager {

        private CopyableDataset copyableDataset;
        private CopyableDatasetMetadata metadata;
        private List<String> relativeFilePaths;
        private Path writerOutputPath;
        private Path targetPath;
        private FileSystem fs;
        private CopyEntity copyEntity;

        private void createDatasetFiles() throws IOException {
            // Create writer output files
            Path datasetWriterOutputPath = new Path(writerOutputPath,
                    copyEntity.getDatasetAndPartition(this.metadata).identifier());
            Path outputPathWithCurrentDirectory = new Path(datasetWriterOutputPath,
                    PathUtils.withoutLeadingSeparator(this.targetPath));
            for (String path : relativeFilePaths) {
                Path pathToCreate = new Path(outputPathWithCurrentDirectory, path);
                fs.mkdirs(pathToCreate.getParent());
                fs.create(pathToCreate);
            }
        }

        public TestDatasetManager(Path testMethodTempPath, State state, String datasetTargetPath,
                List<String> relativeFilePaths) throws IOException {

            this.fs = FileSystem.getLocal(new Configuration());
            this.copyableDataset = new TestCopyableDataset(new Path("origin"));
            this.metadata = new CopyableDatasetMetadata(this.copyableDataset);
            this.relativeFilePaths = relativeFilePaths;
            this.writerOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));

            this.targetPath = new Path(testMethodTempPath, datasetTargetPath);

            FileStatus file = new FileStatus(0, false, 0, 0, 0, new Path("/file"));
            FileSystem fs = FileSystem.getLocal(new Configuration());
            this.copyEntity = CopyableFile.fromOriginAndDestination(fs, file, new Path("/destination"),
                    CopyConfiguration.builder(fs, state.getProperties())
                            .preserve(PreserveAttributes.fromMnemonicString("")).build())
                    .build();

            fs.mkdirs(testMethodTempPath);
            log.info("Created a temp directory for test at " + testMethodTempPath);

        }

        List<WorkUnitState> getWorkUnitStates() throws IOException {
            List<WorkUnitState> workUnitStates = Lists.newArrayList(new WorkUnitState(), new WorkUnitState(),
                    new WorkUnitState());
            for (WorkUnitState wus : workUnitStates) {
                CopySource.serializeCopyableDataset(wus, metadata);
                CopySource.serializeCopyEntity(wus, this.copyEntity);
            }
            return workUnitStates;
        }

        void verifyExists() throws IOException {
            for (String fileRelativePath : relativeFilePaths) {
                Path filePublishPath = new Path(this.targetPath, fileRelativePath);
                Assert.assertEquals(fs.exists(filePublishPath), true);
            }
        }

        void verifyDoesntExist() throws IOException {
            for (String fileRelativePath : relativeFilePaths) {
                Path filePublishPath = new Path(this.targetPath, fileRelativePath);
                Assert.assertEquals(fs.exists(filePublishPath), false);
            }
        }
    }

    @AfterClass
    public void cleanup() {
        try {
            closer.close();
            fs.delete(testClassTempPath, true);
        } catch (IOException e) {
            log.warn(e.getMessage());
        }
    }

}