org.apache.falcon.catalog.CatalogPartitionHandlerTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.catalog.CatalogPartitionHandlerTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.catalog;

import org.apache.commons.lang.RandomStringUtils;
import org.apache.falcon.cluster.util.EmbeddedCluster;
import org.apache.falcon.entity.AbstractTestBase;
import org.apache.falcon.entity.ClusterHelper;
import org.apache.falcon.entity.v0.EntityType;
import org.apache.falcon.entity.v0.cluster.Cluster;
import org.apache.falcon.entity.v0.feed.Feed;
import org.apache.falcon.entity.v0.feed.Properties;
import org.apache.falcon.entity.v0.feed.Property;
import org.apache.falcon.util.HiveTestUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore;
import org.apache.hcatalog.api.HCatPartition;
import org.apache.hcatalog.common.HCatException;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

import java.io.File;
import java.util.Arrays;
import java.util.List;

/**
 * Tests plugin that registers partition on succeeded message.
 */
public class CatalogPartitionHandlerTest extends AbstractTestBase {
    private static final int METASTORE_PORT = 49083;
    private static final String CATALOG_DB = "default";
    public static final Path DATA_PATH = new Path("/projects/falcon/clicks/2014/06/18/18");
    public static final String CATALOG_TABLE = "clicks";

    private Thread hcatServer;
    private EmbeddedCluster embeddedCluster;
    private String metastoreUrl;
    private CatalogPartitionHandler partHandler;

    @BeforeClass
    public void setup() throws Exception {
        embeddedCluster = EmbeddedCluster.newCluster("testCluster");
        final String fsUrl = ClusterHelper.getStorageUrl(embeddedCluster.getCluster());

        hcatServer = new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    HiveConf hiveconf = new HiveConf();
                    hiveconf.set("hive.metastore.warehouse.dir", new File("target/metastore").getAbsolutePath());
                    hiveconf.set("fs.default.name", fsUrl);
                    HiveMetaStore.startMetaStore(METASTORE_PORT, null, hiveconf);
                } catch (Throwable t) {
                    throw new RuntimeException(t);
                }
            }
        });
        hcatServer.start();
        metastoreUrl = ClusterHelper.getRegistryEndPoint(embeddedCluster.getCluster());
        HiveTestUtils.createDatabase(metastoreUrl, "default");
        partHandler = CatalogPartitionHandler.get();
    }

    @BeforeMethod
    public void prepare() throws Exception {
        cleanupStore();
        HiveTestUtils.dropTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE);
        FileSystem fs = embeddedCluster.getFileSystem();
        fs.delete(DATA_PATH, true);

        Cluster cluster = embeddedCluster.getCluster();
        store.publish(EntityType.CLUSTER, cluster);
    }

    @AfterClass
    public void cleanup() {
        hcatServer.stop();
    }

    private Feed createFeed(boolean clearPartitions) throws Exception {
        Feed feed = (Feed) storeEntity(EntityType.FEED, "feed" + RandomStringUtils.randomAlphanumeric(10));
        if (clearPartitions) {
            feed.setPartitions(null);
            HiveTestUtils.createExternalTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE, Arrays.asList("ds"),
                    "/projects/falcon/clicks");
        } else {
            HiveTestUtils.createExternalTable(metastoreUrl, CATALOG_DB, CATALOG_TABLE,
                    Arrays.asList("ds", "country", "region"), "/projects/falcon/clicks");
        }
        feed.setProperties(getProperties(CatalogPartitionHandler.CATALOG_TABLE,
                "catalog:default:clicks#ds={YEAR}-{MONTH}-{DAY}-{HOUR}"));
        return feed;
    }

    @Test
    public void testStaticPartitions() throws Exception {
        String clusterName = embeddedCluster.getCluster().getName();
        String feedName = createFeed(true).getName();

        //no partition if data path doesn't exist
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        try {
            HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
            Assert.fail("Expected exception!");
        } catch (HCatException e) {
            //expected
        }

        //success case
        FileSystem fs = embeddedCluster.getFileSystem();
        fs.mkdirs(DATA_PATH);
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        HCatPartition partition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds",
                "2014-06-18-18");
        Assert.assertNotNull(partition);
        Assert.assertEquals(new Path(partition.getLocation()).toUri().getPath(), DATA_PATH.toString());

        //re-run scenario
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        HCatPartition newPartition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds",
                "2014-06-18-18");
        Assert.assertNotNull(newPartition);
        //validate that its the same old partition updated
        Assert.assertEquals(newPartition.getCreateTime(), partition.getCreateTime());
        Assert.assertEquals(new Path(newPartition.getLocation()).toUri().getPath(), DATA_PATH.toString());
    }

    @Test
    public void testEviction() throws Exception {
        String clusterName = embeddedCluster.getCluster().getName();
        String feedName = createFeed(true).getName();

        //add partition
        FileSystem fs = embeddedCluster.getFileSystem();
        fs.mkdirs(DATA_PATH);
        fs.mkdirs(new Path(DATA_PATH, "US"));
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        HCatPartition partition = HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds",
                "2014-06-18-18");
        Assert.assertNotNull(partition);

        //drop partition
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), true);
        try {
            HiveTestUtils.getPartition(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
            Assert.fail("expected HCatException");
        } catch (HCatException expected) {
            //expected
        }
    }

    @Test
    public void testDynamicPartitions() throws Exception {
        String clusterName = embeddedCluster.getCluster().getName();
        String feedName = createFeed(false).getName();
        FileSystem fs = embeddedCluster.getFileSystem();

        //no dynamic parts, partition should be registered with *
        fs.mkdirs(DATA_PATH);
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        List<HCatPartition> partitions = HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds",
                "2014-06-18-18");
        Assert.assertNotNull(partitions);
        Assert.assertEquals(partitions.size(), 1);
        HCatPartition part = partitions.get(0);
        Assert.assertTrue(part.getValues().equals(Arrays.asList("2014-06-18-18", "NODATA", "NODATA")));

        //success case
        fs.mkdirs(new Path(DATA_PATH, "US/CA"));
        fs.mkdirs(new Path(DATA_PATH, "IND/KA"));
        //Temporary files should not be considered
        fs.mkdirs(new Path(DATA_PATH, "IND/.log"));
        fs.create(new Path(DATA_PATH, "IND/_SUCCESS")).close();
        fs.create(new Path(DATA_PATH, "IND/part-file")).close();
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        partitions = HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
        Assert.assertNotNull(partitions);
        Assert.assertEquals(partitions.size(), 2);

        HCatPartition oldPart = partitions.get(0);
        Assert.assertTrue(oldPart.getValues().equals(Arrays.asList("2014-06-18-18", "IND", "KA")));
        Assert.assertEquals(new Path(oldPart.getLocation()).toUri().getPath(),
                new Path(DATA_PATH, "IND/KA").toString());

        Assert.assertTrue(partitions.get(1).getValues().equals(Arrays.asList("2014-06-18-18", "US", "CA")));
        Assert.assertEquals(new Path(partitions.get(1).getLocation()).toUri().getPath(),
                new Path(DATA_PATH, "US/CA").toString());

        //re-run scenario
        fs.delete(DATA_PATH, true);
        fs.mkdirs(new Path(DATA_PATH, "IND/TN"));
        fs.mkdirs(new Path(DATA_PATH, "IND/KA"));
        partHandler.handlePartition(clusterName, feedName, DATA_PATH.toString(), false);
        partitions = HiveTestUtils.getPartitions(metastoreUrl, CATALOG_DB, CATALOG_TABLE, "ds", "2014-06-18-18");
        Assert.assertNotNull(partitions);
        Assert.assertEquals(partitions.size(), 2);

        HCatPartition newPart = partitions.get(0);
        Assert.assertEquals(newPart.getValues(), Arrays.asList("2014-06-18-18", "IND", "KA"));
        Assert.assertEquals(new Path(newPart.getLocation()).toUri().getPath(),
                new Path(DATA_PATH, "IND/KA").toString());
        Assert.assertEquals(newPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME),
                oldPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME)); //same partition
        Assert.assertTrue(Long.valueOf(newPart.getParameters().get(CatalogPartitionHandler.UPDATE_TIME)) > Long
                .valueOf(oldPart.getParameters().get(CatalogPartitionHandler.CREATE_TIME)));

        Assert.assertEquals(partitions.get(1).getValues(), Arrays.asList("2014-06-18-18", "IND", "TN"));
        Assert.assertEquals(new Path(partitions.get(1).getLocation()).toUri().getPath(),
                new Path(DATA_PATH, "IND/TN").toString());
    }

    private Properties getProperties(String name, String value) {
        Properties props = new Properties();
        Property prop = new Property();
        prop.setName(name);
        prop.setValue(value);
        props.getProperties().add(prop);
        return props;
    }
}