gobblin.data.management.retention.integration.HiveRetentionTest.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.data.management.retention.integration.HiveRetentionTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package gobblin.data.management.retention.integration;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsInAnyOrder;

import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.joda.time.DateTimeUtils;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.io.Files;

import gobblin.configuration.ConfigurationKeys;
import gobblin.data.management.conversion.hive.LocalHiveMetastoreTestUtils;
import gobblin.util.PathUtils;
import gobblin.util.test.RetentionTestDataGenerator.FixedThreadLocalMillisProvider;
import gobblin.util.test.RetentionTestHelper;

@Test(groups = { "SystemTimeTests" })
public class HiveRetentionTest {

    private FileSystem fs;
    private Path testTempPath;

    private LocalHiveMetastoreTestUtils hiveMetastoreTestUtils;
    private static final DateTimeFormatter FORMATTER = DateTimeFormat.forPattern("yyyy-MM-dd-HH")
            .withZone(DateTimeZone.forID(ConfigurationKeys.PST_TIMEZONE_NAME));

    @BeforeClass
    public void setup() throws Exception {
        this.hiveMetastoreTestUtils = LocalHiveMetastoreTestUtils.getInstance();
        fs = FileSystem.getLocal(new Configuration());
        testTempPath = new Path(Files.createTempDir().getAbsolutePath(), "HiveRetentionTest");
        fs.mkdirs(testTempPath);
    }

    private void testTimeBasedHiveRetention(String purgedDbName, String purgedTableName, String configFileName,
            boolean isReplacementTest) throws Exception {

        try {
            DateTimeUtils.setCurrentMillisProvider(
                    new FixedThreadLocalMillisProvider(FORMATTER.parseDateTime("2016-01-10-00").getMillis()));

            // Setup db, table to purge. Creating 4 partitions. 2 will be deleted and 2 will be retained
            String purgedTableSdLoc = new Path(testTempPath, purgedDbName + purgedTableName).toString();
            this.hiveMetastoreTestUtils.dropDatabaseIfExists(purgedDbName);
            final Table purgedTbl = this.hiveMetastoreTestUtils.createTestTable(purgedDbName, purgedTableName,
                    purgedTableSdLoc, ImmutableList.of("datepartition"), false);

            // Setup db, table and partitions to act as replacement partitions source
            String replacementSourceTableSdLoc = new Path(testTempPath, purgedDbName + purgedTableName + "_source")
                    .toString();
            String replacementDbName = purgedDbName + "_source";
            String replacementTableName = purgedTableName + "_source";
            this.hiveMetastoreTestUtils.dropDatabaseIfExists(replacementDbName);
            final Table replacementTbl = this.hiveMetastoreTestUtils.createTestTable(replacementDbName,
                    replacementTableName, replacementSourceTableSdLoc, ImmutableList.of("datepartition"), false);

            String deleted1 = "2016-01-01-00";
            String deleted2 = "2016-01-02-02";

            String retained1 = "2016-01-03-04";
            String retained2 = "2016-01-07-06";

            // Create partitions in table being purged
            Partition pDeleted1 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl,
                    ImmutableList.of(deleted1), (int) System.currentTimeMillis());
            Partition pDeleted2 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl,
                    ImmutableList.of(deleted2), (int) System.currentTimeMillis());
            Partition pRetained1 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl,
                    ImmutableList.of(retained1), (int) System.currentTimeMillis());
            Partition pRetained2 = this.hiveMetastoreTestUtils.addTestPartition(purgedTbl,
                    ImmutableList.of(retained2), (int) System.currentTimeMillis());

            this.fs.mkdirs(new Path(pDeleted1.getSd().getLocation()));
            this.fs.mkdirs(new Path(pDeleted2.getSd().getLocation()));
            this.fs.mkdirs(new Path(pRetained1.getSd().getLocation()));
            this.fs.mkdirs(new Path(pRetained2.getSd().getLocation()));

            // Create partitions in table that is replacement source
            Partition rReplaced1 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl,
                    ImmutableList.of(deleted1), (int) System.currentTimeMillis());
            Partition rReplaced2 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl,
                    ImmutableList.of(deleted2), (int) System.currentTimeMillis());
            Partition rUntouched1 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl,
                    ImmutableList.of(retained1), (int) System.currentTimeMillis());
            Partition rUntouched2 = this.hiveMetastoreTestUtils.addTestPartition(replacementTbl,
                    ImmutableList.of(retained2), (int) System.currentTimeMillis());

            this.fs.mkdirs(new Path(rReplaced1.getSd().getLocation()));
            this.fs.mkdirs(new Path(rReplaced2.getSd().getLocation()));
            this.fs.mkdirs(new Path(rUntouched1.getSd().getLocation()));
            this.fs.mkdirs(new Path(rUntouched2.getSd().getLocation()));

            List<Partition> pPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient()
                    .listPartitions(purgedDbName, purgedTableName, (short) 10);
            Assert.assertEquals(pPartitions.size(), 4);

            List<Partition> rPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient()
                    .listPartitions(replacementDbName, replacementTableName, (short) 10);
            Assert.assertEquals(rPartitions.size(), 4);

            // Run retention
            RetentionTestHelper.clean(fs,
                    PathUtils.combinePaths(RetentionIntegrationTest.TEST_PACKAGE_RESOURCE_NAME,
                            "testHiveTimeBasedRetention", configFileName),
                    Optional.of(PathUtils.combinePaths(RetentionIntegrationTest.TEST_PACKAGE_RESOURCE_NAME,
                            "testHiveTimeBasedRetention", "jobProps.properties")),
                    testTempPath);

            pPartitions = this.hiveMetastoreTestUtils.getLocalMetastoreClient().listPartitions(purgedDbName,
                    purgedTableName, (short) 10);
            String[] expectedRetainedPartitions;
            if (isReplacementTest) {
                // If replacement test, 2 partitions must be replaced - hence total count must be 4
                Assert.assertEquals(pPartitions.size(), 4);
                expectedRetainedPartitions = new String[] { getQlPartition(purgedTbl, pRetained1).getName(),
                        getQlPartition(purgedTbl, pRetained2).getName(),
                        getQlPartition(purgedTbl, pDeleted1).getName(),
                        getQlPartition(purgedTbl, pDeleted2).getName() };
            } else {
                // If not a replacement test, 2 partitions must be purged
                Assert.assertEquals(pPartitions.size(), 2);
                expectedRetainedPartitions = new String[] { getQlPartition(purgedTbl, pRetained1).getName(),
                        getQlPartition(purgedTbl, pRetained2).getName() };
            }

            // Check if all available partitions are that which are expected
            assertThat(FluentIterable.from(pPartitions).transform(new Function<Partition, String>() {
                @Override
                public String apply(Partition input) {
                    return getQlPartition(purgedTbl, input).getName();
                }
            }).toList(), containsInAnyOrder(expectedRetainedPartitions));

            // Check that replaced partitions are pointing to correct physical location
            if (isReplacementTest) {
                for (Partition partition : pPartitions) {
                    if (getQlPartition(purgedTbl, partition).getName()
                            .equalsIgnoreCase(getQlPartition(purgedTbl, pDeleted1).getName())) {
                        Assert.assertEquals(partition.getSd().getLocation(), rReplaced1.getSd().getLocation(),
                                "Replaced partition location not updated.");
                    }
                    if (getQlPartition(purgedTbl, partition).getName()
                            .equalsIgnoreCase(getQlPartition(purgedTbl, pDeleted2).getName())) {
                        Assert.assertEquals(partition.getSd().getLocation(), rReplaced2.getSd().getLocation(),
                                "Replaced partition location not updated.");
                    }
                }
            }

            // Irrespective of whether it is a replacement test, purged partition directories must be deleted
            Assert.assertTrue(this.fs.exists(new Path(pRetained1.getSd().getLocation())));
            Assert.assertTrue(this.fs.exists(new Path(pRetained2.getSd().getLocation())));
            Assert.assertFalse(this.fs.exists(new Path(pDeleted1.getSd().getLocation())));
            Assert.assertFalse(this.fs.exists(new Path(pDeleted2.getSd().getLocation())));

            // Replacement source partition directories must be left untouched
            Assert.assertTrue(this.fs.exists(new Path(rReplaced1.getSd().getLocation())));
            Assert.assertTrue(this.fs.exists(new Path(rReplaced2.getSd().getLocation())));
            Assert.assertTrue(this.fs.exists(new Path(rUntouched1.getSd().getLocation())));
            Assert.assertTrue(this.fs.exists(new Path(rUntouched2.getSd().getLocation())));

        } finally {
            DateTimeUtils.setCurrentMillisSystem();
        }

    }

    @Test(dependsOnMethods = { "testTimeBasedHiveRetentionWithConfigStore" })
    public void testTimeBasedHivePartitionReplacementWithConfigStore() throws Exception {
        String dbName = "hiveTestDbConfigStore";
        String tableName = "testTable";
        this.testTimeBasedHiveRetention(dbName, tableName, "replacement.conf", true);
    }

    @Test(dependsOnMethods = { "testTimeBasedHiveRetentionWithJobProps" })
    public void testTimeBasedHiveRetentionWithConfigStore() throws Exception {
        String dbName = "hiveTestDbConfigStore";
        String tableName = "testTable";
        this.testTimeBasedHiveRetention(dbName, tableName, "selection.conf", false);
    }

    @Test
    public void testTimeBasedHiveRetentionWithJobProps() throws Exception {
        String dbName = "hiveTestDb";
        String tableName = "testTable";
        this.testTimeBasedHiveRetention(dbName, tableName, "hive-retention.job", false);
    }

    private static org.apache.hadoop.hive.ql.metadata.Partition getQlPartition(final Table table,
            final Partition partition) {
        try {
            return new org.apache.hadoop.hive.ql.metadata.Partition(
                    new org.apache.hadoop.hive.ql.metadata.Table(table), partition);
        } catch (HiveException e) {
            throw new RuntimeException(e);
        }
    }

    @AfterClass
    public void cleanUp() {
        try {
            fs.delete(this.testTempPath, true);
        } catch (Exception e) {
            // ignore
        }
    }

}