com.facebook.presto.hive.AbstractTestHiveClientS3.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.AbstractTestHiveClientS3.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.presto.GroupByHashPageIndexerFactory;
import com.facebook.presto.hive.metastore.CachingHiveMetastore;
import com.facebook.presto.hive.metastore.HiveMetastoreClient;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
import com.facebook.presto.spi.ConnectorPageSink;
import com.facebook.presto.spi.ConnectorPageSinkProvider;
import com.facebook.presto.spi.ConnectorPageSource;
import com.facebook.presto.spi.ConnectorPageSourceProvider;
import com.facebook.presto.spi.ConnectorSplit;
import com.facebook.presto.spi.ConnectorSplitManager;
import com.facebook.presto.spi.ConnectorSplitSource;
import com.facebook.presto.spi.ConnectorTableHandle;
import com.facebook.presto.spi.ConnectorTableLayoutResult;
import com.facebook.presto.spi.ConnectorTableMetadata;
import com.facebook.presto.spi.Constraint;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.predicate.TupleDomain;
import com.facebook.presto.testing.MaterializedResult;
import com.facebook.presto.testing.MaterializedRow;
import com.facebook.presto.type.TypeRegistry;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.net.HostAndPort;
import io.airlift.json.JsonCodec;
import io.airlift.slice.Slice;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ExecutorService;

import static com.facebook.presto.hadoop.HadoopFileStatus.isDirectory;
import static com.facebook.presto.hive.AbstractTestHiveClient.listAllDataPaths;
import static com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY;
import static com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY;
import static com.facebook.presto.hive.HiveTestUtils.DEFAULT_HIVE_DATA_STREAM_FACTORIES;
import static com.facebook.presto.hive.HiveTestUtils.DEFAULT_HIVE_RECORD_CURSOR_PROVIDER;
import static com.facebook.presto.hive.HiveTestUtils.SESSION;
import static com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER;
import static com.facebook.presto.hive.HiveTestUtils.getTypes;
import static com.facebook.presto.hive.util.Types.checkType;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService;
import static io.airlift.concurrent.MoreFutures.getFutureValue;
import static io.airlift.concurrent.Threads.daemonThreadsNamed;
import static io.airlift.testing.Assertions.assertEqualsIgnoreOrder;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.concurrent.Executors.newCachedThreadPool;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

@Test(groups = "hive-s3")
public abstract class AbstractTestHiveClientS3 {
    protected String writableBucket;

    protected String database;
    protected SchemaTableName tableS3;
    protected SchemaTableName temporaryCreateTable;

    protected HdfsEnvironment hdfsEnvironment;
    protected LocationService locationService;
    protected TestingHiveMetastore metastoreClient;
    protected HiveMetadata metadata;
    protected ConnectorSplitManager splitManager;
    protected ConnectorPageSinkProvider pageSinkProvider;
    protected ConnectorPageSourceProvider pageSourceProvider;

    private ExecutorService executor;

    @BeforeClass
    public void setUp() throws Exception {
        executor = newCachedThreadPool(daemonThreadsNamed("hive-%s"));
    }

    @AfterClass
    public void tearDown() throws Exception {
        if (executor != null) {
            executor.shutdownNow();
            executor = null;
        }
    }

    protected void setupHive(String databaseName) {
        database = databaseName;
        tableS3 = new SchemaTableName(database, "presto_test_s3");

        String random = UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", "");
        temporaryCreateTable = new SchemaTableName(database, "tmp_presto_test_create_s3_" + random);
    }

    protected void setup(String host, int port, String databaseName, String awsAccessKey, String awsSecretKey,
            String writableBucket) {
        this.writableBucket = writableBucket;

        setupHive(databaseName);

        HiveClientConfig hiveClientConfig = new HiveClientConfig().setS3AwsAccessKey(awsAccessKey)
                .setS3AwsSecretKey(awsSecretKey);

        String proxy = System.getProperty("hive.metastore.thrift.client.socks-proxy");
        if (proxy != null) {
            hiveClientConfig.setMetastoreSocksProxy(HostAndPort.fromString(proxy));
        }

        HiveConnectorId connectorId = new HiveConnectorId("hive-test");
        HiveCluster hiveCluster = new TestingHiveCluster(hiveClientConfig, host, port);
        ExecutorService executor = newCachedThreadPool(daemonThreadsNamed("hive-s3-%s"));
        HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(
                new HdfsConfigurationUpdater(hiveClientConfig));
        HivePartitionManager hivePartitionManager = new HivePartitionManager(connectorId, hiveClientConfig);

        hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hiveClientConfig);
        locationService = new HiveLocationService(metastoreClient, hdfsEnvironment);
        metastoreClient = new TestingHiveMetastore(hiveCluster, executor, hiveClientConfig, writableBucket,
                hdfsEnvironment);
        TypeRegistry typeManager = new TypeRegistry();
        JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
        metadata = new HiveMetadata(connectorId, hiveClientConfig, metastoreClient, hdfsEnvironment,
                hivePartitionManager, newDirectExecutorService(), typeManager, locationService,
                partitionUpdateCodec);
        splitManager = new HiveSplitManager(connectorId, hiveClientConfig, metastoreClient, new NamenodeStats(),
                hdfsEnvironment, new HadoopDirectoryLister(), executor);
        pageSinkProvider = new HivePageSinkProvider(hdfsEnvironment, metastoreClient,
                new GroupByHashPageIndexerFactory(), typeManager, new HiveClientConfig(), locationService,
                partitionUpdateCodec);
        pageSourceProvider = new HivePageSourceProvider(hiveClientConfig, hdfsEnvironment,
                DEFAULT_HIVE_RECORD_CURSOR_PROVIDER, DEFAULT_HIVE_DATA_STREAM_FACTORIES, TYPE_MANAGER);
    }

    @Test
    public void testGetRecordsS3() throws Exception {
        ConnectorTableHandle table = getTableHandle(tableS3);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(SESSION, table).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);

        List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(SESSION, table,
                new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty());
        HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults)
                .getTableLayout().getHandle();
        assertEquals(layoutHandle.getPartitions().get().size(), 1);
        ConnectorSplitSource splitSource = splitManager.getSplits(SESSION, layoutHandle);

        long sum = 0;

        for (ConnectorSplit split : getAllSplits(splitSource)) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(SESSION, split,
                    columnHandles)) {
                MaterializedResult result = materializeSourceDataStream(SESSION, pageSource,
                        getTypes(columnHandles));

                for (MaterializedRow row : result) {
                    sum += (Long) row.getField(columnIndex.get("t_bigint"));
                }
            }
        }
        assertEquals(sum, 78300);
    }

    @Test
    public void testGetFileStatus() throws Exception {
        Path basePath = new Path("s3://presto-test-hive/");
        Path tablePath = new Path(basePath, "presto_test_s3");
        Path filePath = new Path(tablePath, "test1.csv");
        FileSystem fs = hdfsEnvironment.getFileSystem(basePath);

        assertTrue(isDirectory(fs.getFileStatus(basePath)));
        assertTrue(isDirectory(fs.getFileStatus(tablePath)));
        assertFalse(isDirectory(fs.getFileStatus(filePath)));
        assertFalse(fs.exists(new Path(basePath, "foo")));
    }

    @Test
    public void testRename() throws Exception {
        Path basePath = new Path(format("s3://%s/rename/%s/", writableBucket, UUID.randomUUID()));
        FileSystem fs = hdfsEnvironment.getFileSystem(basePath);
        assertFalse(fs.exists(basePath));

        // create file foo.txt
        Path path = new Path(basePath, "foo.txt");
        assertTrue(fs.createNewFile(path));
        assertTrue(fs.exists(path));

        // rename foo.txt to bar.txt
        Path newPath = new Path(basePath, "bar.txt");
        assertFalse(fs.exists(newPath));
        assertTrue(fs.rename(path, newPath));
        assertFalse(fs.exists(path));
        assertTrue(fs.exists(newPath));

        // create file foo.txt and rename to bar.txt
        assertTrue(fs.createNewFile(path));
        assertFalse(fs.rename(path, newPath));
        assertTrue(fs.exists(path));

        // rename foo.txt to foo.txt
        assertTrue(fs.rename(path, path));
        assertTrue(fs.exists(path));

        // delete foo.txt
        assertTrue(fs.delete(path, false));
        assertFalse(fs.exists(path));

        // create directory source with file
        Path source = new Path(basePath, "source");
        assertTrue(fs.createNewFile(new Path(source, "test.txt")));

        // rename source to non-existing target
        Path target = new Path(basePath, "target");
        assertFalse(fs.exists(target));
        assertTrue(fs.rename(source, target));
        assertFalse(fs.exists(source));
        assertTrue(fs.exists(target));

        // create directory source with file
        assertTrue(fs.createNewFile(new Path(source, "test.txt")));

        // rename source to existing target
        assertTrue(fs.rename(source, target));
        assertFalse(fs.exists(source));
        target = new Path(target, "source");
        assertTrue(fs.exists(target));
        assertTrue(fs.exists(new Path(target, "test.txt")));

        // delete target
        target = new Path(basePath, "target");
        assertTrue(fs.exists(target));
        assertTrue(fs.delete(target, true));
        assertFalse(fs.exists(target));

        // cleanup
        fs.delete(basePath, true);
    }

    @Test
    public void testTableCreation() throws Exception {
        for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) {
            try {
                doCreateTable(temporaryCreateTable, storageFormat, "presto_test");
            } finally {
                dropTable(temporaryCreateTable);
            }
        }
    }

    private void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat, String tableOwner)
            throws Exception {
        // begin creating the table
        List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder()
                .add(new ColumnMetadata("id", BIGINT, false)).build();

        Map<String, Object> properties = ImmutableMap.<String, Object>builder()
                .put(STORAGE_FORMAT_PROPERTY, storageFormat).put(PARTITIONED_BY_PROPERTY, ImmutableList.of())
                .build();
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, properties,
                tableOwner);
        HiveOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata);

        MaterializedResult data = MaterializedResult.resultBuilder(SESSION, BIGINT).row(1).row(3).row(2).build();

        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(SESSION, outputHandle);
        sink.appendPage(data.toPage(), null);
        Collection<Slice> fragments = sink.commit();

        // commit the table
        metadata.commitCreateTable(SESSION, outputHandle, fragments);

        // Hack to work around the metastore not being configured for S3.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for S3.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct S3 location.
        metastoreClient.updateTableLocation(database, tableName.getTableName(),
                locationService.writePath(outputHandle.getLocationHandle(), Optional.empty()).get().toString());

        // load the new table
        ConnectorTableHandle tableHandle = getTableHandle(tableName);
        List<ColumnHandle> columnHandles = ImmutableList
                .copyOf(metadata.getColumnHandles(SESSION, tableHandle).values());

        // verify the metadata
        tableMetadata = metadata.getTableMetadata(SESSION, getTableHandle(tableName));
        assertEquals(tableMetadata.getOwner(), tableOwner);
        assertEquals(tableMetadata.getColumns(), columns);

        // verify the data
        List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(SESSION, tableHandle,
                new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty());
        HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults)
                .getTableLayout().getHandle();
        assertEquals(layoutHandle.getPartitions().get().size(), 1);
        ConnectorSplitSource splitSource = splitManager.getSplits(SESSION, layoutHandle);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));

        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(SESSION, split, columnHandles)) {
            MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
    }

    private void dropTable(SchemaTableName table) {
        try {
            metastoreClient.dropTable(table.getSchemaName(), table.getTableName());
        } catch (RuntimeException e) {
            // this usually occurs because the table was not created
        }
    }

    private ConnectorTableHandle getTableHandle(SchemaTableName tableName) {
        ConnectorTableHandle handle = metadata.getTableHandle(SESSION, tableName);
        checkArgument(handle != null, "table not found: %s", tableName);
        return handle;
    }

    private static List<ConnectorSplit> getAllSplits(ConnectorSplitSource source) throws InterruptedException {
        ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
        while (!source.isFinished()) {
            splits.addAll(getFutureValue(source.getNextBatch(1000)));
        }
        return splits.build();
    }

    private static ImmutableMap<String, Integer> indexColumns(List<ColumnHandle> columnHandles) {
        ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder();
        int i = 0;
        for (ColumnHandle columnHandle : columnHandles) {
            HiveColumnHandle hiveColumnHandle = checkType(columnHandle, HiveColumnHandle.class, "columnHandle");
            index.put(hiveColumnHandle.getName(), i);
            i++;
        }
        return index.build();
    }

    private static class TestingHiveMetastore extends CachingHiveMetastore {
        private final String writableBucket;
        private final HdfsEnvironment hdfsEnvironment;

        public TestingHiveMetastore(HiveCluster hiveCluster, ExecutorService executor,
                HiveClientConfig hiveClientConfig, String writableBucket, HdfsEnvironment hdfsEnvironment) {
            super(hiveCluster, executor, hiveClientConfig);
            this.writableBucket = writableBucket;
            this.hdfsEnvironment = hdfsEnvironment;
        }

        @Override
        public Optional<Database> getDatabase(String databaseName) {
            Optional<Database> database = super.getDatabase(databaseName);
            if (database.isPresent()) {
                database.get().setLocationUri("s3://" + writableBucket + "/");
            }
            return database;
        }

        @Override
        public void createTable(Table table) {
            // hack to work around the metastore not being configured for S3
            table.getSd().setLocation("/");
            super.createTable(table);
        }

        @Override
        public void dropTable(String databaseName, String tableName) {
            try {
                Optional<Table> table = getTable(databaseName, tableName);
                if (!table.isPresent()) {
                    throw new TableNotFoundException(new SchemaTableName(databaseName, tableName));
                }

                // hack to work around the metastore not being configured for S3
                List<String> locations = listAllDataPaths(this, databaseName, tableName);
                table.get().getSd().setLocation("/");

                // drop table
                try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) {
                    client.alterTable(databaseName, tableName, table.get());
                    client.dropTable(databaseName, tableName, false);
                }

                // drop data
                for (String location : locations) {
                    Path path = new Path(location);
                    hdfsEnvironment.getFileSystem(path).delete(path, true);
                }
            } catch (Exception e) {
                throw Throwables.propagate(e);
            } finally {
                invalidateTable(databaseName, tableName);
            }
        }

        public void updateTableLocation(String databaseName, String tableName, String location) {
            try {
                Optional<Table> table = getTable(databaseName, tableName);
                if (!table.isPresent()) {
                    throw new TableNotFoundException(new SchemaTableName(databaseName, tableName));
                }
                table.get().getSd().setLocation(location);
                try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) {
                    client.alterTable(databaseName, tableName, table.get());
                }
            } catch (TException e) {
                throw Throwables.propagate(e);
            }
        }
    }
}