gobblin.data.management.copy.RecursiveCopyableDatasetTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.copy.RecursiveCopyableDatasetTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.data.management.copy;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.Test;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import gobblin.commit.CommitStep;
import gobblin.configuration.ConfigurationKeys;
import gobblin.data.management.copy.entities.CommitStepCopyEntity;
import gobblin.util.commit.DeleteFileCommitStep;

import avro.shaded.com.google.common.base.Predicate;
import avro.shaded.com.google.common.collect.Iterables;
import javax.annotation.Nullable;
import lombok.Data;

public class RecursiveCopyableDatasetTest {

    @Test
    public void testSimpleCopy() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"),
                createFileStatus(source, "file2"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3"));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                FileSystem.getLocal(new Configuration()),
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 2);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")),
                new Path(target, "file1"));
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")),
                new Path(target, "file2"));
        Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0);
    }

    @Test
    public void testCopyWithNonConflictingCollision() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1),
                createFileStatus(source, "file2"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                FileSystem.getLocal(new Configuration()),
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 1);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")),
                new Path(target, "file2"));
        Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0);
    }

    @Test
    public void testCopyWithConflictingCollisionDueToSize() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1),
                createFileStatus(source, "file2"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 2));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        try {
            Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                    FileSystem.getLocal(new Configuration()),
                    CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());
            Assert.fail();
        } catch (IOException ioe) {
            // should throw exception due to collision
        }
    }

    @Test
    public void testCopyWithConflictingCollisionDueToModtime() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1, 10),
                createFileStatus(source, "file2"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1, 9));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        try {
            Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                    FileSystem.getLocal(new Configuration()),
                    CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());
            Assert.fail();
        } catch (IOException ioe) {
            // should throw exception due to collision
        }
    }

    @Test
    public void testCopyWithUpdate() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        FileStatus targetFile1 = createFileStatus(target, "file1", 2);

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1),
                createFileStatus(source, "file2"));
        List<FileStatus> targetFiles = Lists.newArrayList(targetFile1);

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        properties.setProperty(RecursiveCopyableDataset.UPDATE_KEY, "true");
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        FileSystem targetFsUnderlying = FileSystem.getLocal(new Configuration());
        FileSystem targetFs = Mockito.spy(targetFsUnderlying);
        Mockito.doReturn(targetFile1).when(targetFs).getFileStatus(new Path(target, "file1"));

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(targetFs,
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 3);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")),
                new Path(target, "file1"));
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")),
                new Path(target, "file2"));
        Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1);
        Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file1")));
    }

    @Test
    public void testCopyWithDeleteTarget() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3"));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true");
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                FileSystem.getLocal(new Configuration()),
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 2);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")),
                new Path(target, "file1"));
        Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1);
        Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3")));

        CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables
                .filter(copyableFiles, new Predicate<CopyEntity>() {
                    @Override
                    public boolean apply(@Nullable CopyEntity copyEntity) {
                        return copyEntity instanceof CommitStepCopyEntity;
                    }
                }).iterator().next();
        DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep();
        Assert.assertFalse(step.getParentDeletionLimit().isPresent());
    }

    @Test
    public void testCopyWithDeleteTargetAndDeleteParentDirectories() throws Exception {
        Path source = new Path("/source");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"));
        List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3"));

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
        properties.setProperty(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, "true");
        properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true");
        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles,
                targetFiles, properties);

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                FileSystem.getLocal(new Configuration()),
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 2);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")),
                new Path(target, "file1"));
        Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1);
        Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3")));

        CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables
                .filter(copyableFiles, new Predicate<CopyEntity>() {
                    @Override
                    public boolean apply(@Nullable CopyEntity copyEntity) {
                        return copyEntity instanceof CommitStepCopyEntity;
                    }
                }).iterator().next();
        DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep();
        Assert.assertTrue(step.getParentDeletionLimit().isPresent());
        Assert.assertEquals(step.getParentDeletionLimit().get(), target);
    }

    @Test
    public void testCorrectComputationOfTargetPathsWhenUsingGlob() throws Exception {
        Path source = new Path("/source/directory");
        Path target = new Path("/target");

        List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"));
        List<FileStatus> targetFiles = Lists.newArrayList();

        Properties properties = new Properties();
        properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());

        FileSystem sourceUnderlyingFS = FileSystem.getLocal(new Configuration());
        FileSystem sourceFs = Mockito.spy(sourceUnderlyingFS);
        Mockito.doReturn(new FileStatus(0, true, 0, 0, 0, source)).when(sourceFs).getFileStatus(source);

        RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, new Path(target, "directory"),
                sourceFiles, targetFiles, properties, new Path("/source/*"), sourceFs);

        Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(
                FileSystem.get(new Configuration()),
                CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());

        Assert.assertEquals(copyableFiles.size(), 1);
        ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
        Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
        Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")),
                new Path(target, "directory/file1"));
    }

    private ClassifiedFiles classifyFiles(Collection<? extends CopyEntity> copyEntities) {
        Map<Path, Path> pathsToCopy = Maps.newHashMap();
        Set<Path> pathsToDelete = Sets.newHashSet();
        for (CopyEntity ce : copyEntities) {
            if (ce instanceof CopyableFile) {
                pathsToCopy.put(((CopyableFile) ce).getOrigin().getPath(), ((CopyableFile) ce).getDestination());
            }
            if (ce instanceof CommitStepCopyEntity) {
                CommitStep step = ((CommitStepCopyEntity) ce).getStep();
                if (step instanceof DeleteFileCommitStep) {
                    for (FileStatus status : ((DeleteFileCommitStep) step).getPathsToDelete()) {
                        pathsToDelete.add(status.getPath());
                    }
                }
            }
        }
        return new ClassifiedFiles(pathsToCopy, pathsToDelete);
    }

    @Data
    private class ClassifiedFiles {
        private final Map<Path, Path> pathsToCopy;
        private final Set<Path> pathsToDelete;
    }

    private FileStatus createFileStatus(Path root, String relative) {
        return createFileStatus(root, relative, 0, 0);
    }

    private FileStatus createFileStatus(Path root, String relative, long length) {
        return createFileStatus(root, relative, length, 0);
    }

    private FileStatus createFileStatus(Path root, String relative, long length, long modtime) {
        return new FileStatus(length, false, 0, 0, modtime, new Path(root, relative));
    }

    private static class TestRecursiveCopyableDataset extends RecursiveCopyableDataset {

        private final Path source;
        private final Path target;
        private final List<FileStatus> sourceFiles;
        private final List<FileStatus> targetFiles;

        public TestRecursiveCopyableDataset(Path source, Path target, List<FileStatus> sourceFiles,
                List<FileStatus> targetFiles, Properties properties) throws IOException {
            this(source, target, sourceFiles, targetFiles, properties, source,
                    FileSystem.getLocal(new Configuration()));
        }

        public TestRecursiveCopyableDataset(Path source, Path target, List<FileStatus> sourceFiles,
                List<FileStatus> targetFiles, Properties properties, Path glob, FileSystem sourceFs)
                throws IOException {
            super(sourceFs, source, properties, glob);
            this.source = source;
            this.target = target;
            this.sourceFiles = sourceFiles;
            this.targetFiles = targetFiles;
        }

        @Override
        protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter)
                throws IOException {
            if (path.equals(this.source)) {
                return this.sourceFiles;
            } else if (path.equals(this.target)) {
                return this.targetFiles;
            } else {
                throw new RuntimeException("Not a recognized path. " + path);
            }
        }
    }
}