com.uber.hoodie.hive.client.HoodieHiveClient.java Source code

Java tutorial

Introduction

Here is the source code for com.uber.hoodie.hive.client.HoodieHiveClient.java

Source

/*
 * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.hive.client;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.uber.hoodie.hive.HoodieHiveConfiguration;
import com.uber.hoodie.hive.HoodieHiveDatasetException;
import com.uber.hoodie.hive.PartitionStrategy;
import com.uber.hoodie.hive.model.HoodieDatasetReference;
import com.uber.hoodie.hive.model.SchemaDifference;
import com.uber.hoodie.hive.model.StoragePartition;
import com.uber.hoodie.hive.model.TablePartition;
import org.apache.commons.dbcp.BasicDataSource;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import parquet.schema.MessageType;

import javax.sql.DataSource;
import java.io.Closeable;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

/**
 * Client to access Hive
 */
public class HoodieHiveClient implements Closeable {
    private static Logger LOG = LoggerFactory.getLogger(HoodieHiveClient.class);
    private static String driverName = "org.apache.hive.jdbc.HiveDriver";

    static {
        try {
            Class.forName(driverName);
        } catch (ClassNotFoundException e) {
            throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
        }
    }

    private final HoodieHiveConfiguration configuration;
    private Connection connection;
    private HiveConf hiveConf;

    public HoodieHiveClient(HoodieHiveConfiguration configuration) {
        this.configuration = configuration;
        this.hiveConf = new HiveConf();
        this.hiveConf.addResource(configuration.getConfiguration());
        try {
            this.connection = getConnection();
        } catch (SQLException e) {
            throw new HoodieHiveDatasetException("Failed to connect to hive metastore ", e);
        }
    }

    /**
     * Scan all the partitions for the given {@link HoodieDatasetReference} with the given {@link PartitionStrategy}
     *
     * @param metadata
     * @return
     */
    public List<TablePartition> scanPartitions(HoodieDatasetReference metadata) {
        if (!checkTableExists(metadata)) {
            throw new IllegalArgumentException(
                    "Failed to scan partitions as table " + metadata.getDatabaseTableName() + " does not exist");
        }
        List<TablePartition> partitions = Lists.newArrayList();
        HiveMetaStoreClient client = null;
        try {
            client = new HiveMetaStoreClient(hiveConf);
            List<Partition> hivePartitions = client.listPartitions(metadata.getDatabaseName(),
                    metadata.getTableName(), (short) -1);
            for (Partition partition : hivePartitions) {
                partitions.add(new TablePartition(metadata, partition));
            }
            return partitions;
        } catch (Exception e) {
            throw new HoodieHiveDatasetException("Failed to scan partitions for " + metadata, e);
        } finally {
            if (client != null) {
                client.close();
            }
        }
    }

    /**
     * Check if table exists
     *
     * @param metadata
     * @return
     */
    public boolean checkTableExists(HoodieDatasetReference metadata) {
        ResultSet resultSet = null;
        try {
            Connection conn = getConnection();
            resultSet = conn.getMetaData().getTables(null, metadata.getDatabaseName(), metadata.getTableName(),
                    null);
            return resultSet.next();
        } catch (SQLException e) {
            throw new HoodieHiveDatasetException("Failed to check if table exists " + metadata, e);
        } finally {
            closeQuietly(resultSet, null);
        }
    }

    /**
     * Update the hive metastore pointed to by {@link HoodieDatasetReference} with the difference
     * in schema {@link SchemaDifference}
     *
     * @param metadata
     * @param hivePartitionFieldNames
     * @param newSchema               @return
     */
    public boolean updateTableDefinition(HoodieDatasetReference metadata, String[] hivePartitionFieldNames,
            MessageType newSchema) {
        try {
            String newSchemaStr = SchemaUtil.generateSchemaString(newSchema);
            // Cascade clause should not be present for non-partitioned tables
            String cascadeClause = hivePartitionFieldNames.length > 0 ? " cascade" : "";
            StringBuilder sqlBuilder = new StringBuilder("ALTER TABLE ").append("`")
                    .append(metadata.getDatabaseTableName()).append("`").append(" REPLACE COLUMNS(")
                    .append(newSchemaStr).append(" )").append(cascadeClause);
            LOG.info("Creating table with " + sqlBuilder);
            return updateHiveSQL(sqlBuilder.toString());
        } catch (IOException e) {
            throw new HoodieHiveDatasetException("Failed to update table for " + metadata, e);
        }
    }

    /**
     * Execute a update in hive metastore with this SQL
     *
     * @param s SQL to execute
     * @return
     */
    public boolean updateHiveSQL(String s) {
        Statement stmt = null;
        try {
            Connection conn = getConnection();
            stmt = conn.createStatement();
            LOG.info("Executing SQL " + s);
            return stmt.execute(s);
        } catch (SQLException e) {
            throw new HoodieHiveDatasetException("Failed in executing SQL " + s, e);
        } finally {
            closeQuietly(null, stmt);
        }
    }

    /**
     * Get the table schema
     *
     * @param datasetReference
     * @return
     */
    public Map<String, String> getTableSchema(HoodieDatasetReference datasetReference) {
        if (!checkTableExists(datasetReference)) {
            throw new IllegalArgumentException(
                    "Failed to get schema as table " + datasetReference.getDatabaseTableName() + " does not exist");
        }
        Map<String, String> schema = Maps.newHashMap();
        ResultSet result = null;
        try {
            Connection connection = getConnection();
            DatabaseMetaData databaseMetaData = connection.getMetaData();
            result = databaseMetaData.getColumns(null, datasetReference.getDatabaseName(),
                    datasetReference.getTableName(), null);
            while (result.next()) {
                String columnName = result.getString(4);
                String columnType = result.getString(6);
                schema.put(columnName, columnType);
            }
            return schema;
        } catch (SQLException e) {
            throw new HoodieHiveDatasetException("Failed to get table schema for " + datasetReference, e);
        } finally {
            closeQuietly(result, null);
        }
    }

    public void addPartitionsToTable(HoodieDatasetReference datasetReference,
            List<StoragePartition> partitionsToAdd, PartitionStrategy strategy) {
        if (partitionsToAdd.isEmpty()) {
            LOG.info("No partitions to add for " + datasetReference);
            return;
        }
        LOG.info("Adding partitions " + partitionsToAdd.size() + " to dataset " + datasetReference);
        String sql = constructAddPartitions(datasetReference, partitionsToAdd, strategy);
        updateHiveSQL(sql);
    }

    public void updatePartitionsToTable(HoodieDatasetReference datasetReference,
            List<StoragePartition> changedPartitions, PartitionStrategy partitionStrategy) {
        if (changedPartitions.isEmpty()) {
            LOG.info("No partitions to change for " + datasetReference);
            return;
        }
        LOG.info("Changing partitions " + changedPartitions.size() + " on dataset " + datasetReference);
        List<String> sqls = constructChangePartitions(datasetReference, changedPartitions, partitionStrategy);
        for (String sql : sqls) {
            updateHiveSQL(sql);
        }
    }

    public void createTable(MessageType storageSchema, HoodieDatasetReference metadata, String[] partitionKeys,
            String inputFormatClass, String outputFormatClass) {
        try {
            String createSQLQuery = SchemaUtil.generateCreateDDL(storageSchema, metadata, partitionKeys,
                    inputFormatClass, outputFormatClass);
            LOG.info("Creating table with " + createSQLQuery);
            updateHiveSQL(createSQLQuery);
        } catch (IOException e) {
            throw new HoodieHiveDatasetException("Failed to create table for " + metadata, e);
        }
    }

    private static void closeQuietly(ResultSet resultSet, Statement stmt) {
        try {
            if (stmt != null)
                stmt.close();
            if (resultSet != null)
                resultSet.close();
        } catch (SQLException e) {
            LOG.error("Could not close the resultset opened ", e);
        }
    }

    private Connection getConnection() throws SQLException {
        int count = 0;
        int maxTries = 3;
        if (connection == null) {
            Configuration conf = configuration.getConfiguration();
            DataSource ds = getDatasource();
            LOG.info("Getting Hive Connection from Datasource " + ds);
            while (true) {
                try {
                    this.connection = ds.getConnection();
                    break;
                } catch (SQLException e) {
                    if (++count == maxTries)
                        throw e;
                }
            }
        }
        return connection;
    }

    private DataSource getDatasource() {
        BasicDataSource ds = new BasicDataSource();
        ds.setDriverClassName(driverName);
        ds.setUrl(getHiveJdbcUrlWithDefaultDBName());
        ds.setUsername(configuration.getHiveUsername());
        ds.setPassword(configuration.getHivePassword());
        return ds;
    }

    public String getHiveJdbcUrlWithDefaultDBName() {
        String hiveJdbcUrl = configuration.getHiveJdbcUrl();
        String urlAppend = null;
        // If the hive url contains addition properties like ;transportMode=http;httpPath=hs2
        if (hiveJdbcUrl.contains(";")) {
            urlAppend = hiveJdbcUrl.substring(hiveJdbcUrl.indexOf(";"));
            hiveJdbcUrl = hiveJdbcUrl.substring(0, hiveJdbcUrl.indexOf(";"));
        }
        if (!hiveJdbcUrl.endsWith("/")) {
            hiveJdbcUrl = hiveJdbcUrl + "/";
        }
        return hiveJdbcUrl + configuration.getDbName() + (urlAppend == null ? "" : urlAppend);
    }

    private static List<String> constructChangePartitions(HoodieDatasetReference metadata,
            List<StoragePartition> partitions, PartitionStrategy partitionStrategy) {
        String[] partitionFieldNames = partitionStrategy.getHivePartitionFieldNames();

        List<String> changePartitions = Lists.newArrayList();
        String alterTable = "ALTER TABLE " + metadata.getDatabaseTableName();
        for (StoragePartition partition : partitions) {
            StringBuilder partBuilder = new StringBuilder();
            String[] partitionValues = partition.getPartitionFieldValues();
            Preconditions.checkArgument(partitionFieldNames.length == partitionValues.length,
                    "Partition key parts " + Arrays.toString(partitionFieldNames)
                            + " does not match with partition values " + Arrays.toString(partitionValues)
                            + ". Check partition strategy. ");
            for (int i = 0; i < partitionFieldNames.length; i++) {
                partBuilder.append(partitionFieldNames[i]).append("=").append("'").append(partitionValues[i])
                        .append("'");
            }
            String changePartition = alterTable + " PARTITION (" + partBuilder.toString() + ") SET LOCATION '"
                    + "hdfs://nameservice1" + partition.getPartitionPath() + "'";
            changePartitions.add(changePartition);
        }
        return changePartitions;
    }

    private static String constructAddPartitions(HoodieDatasetReference metadata, List<StoragePartition> partitions,
            PartitionStrategy partitionStrategy) {
        return constructAddPartitions(metadata.getDatabaseTableName(), partitions, partitionStrategy);
    }

    private static String constructAddPartitions(String newDbTableName, List<StoragePartition> partitions,
            PartitionStrategy partitionStrategy) {
        String[] partitionFieldNames = partitionStrategy.getHivePartitionFieldNames();
        StringBuilder alterSQL = new StringBuilder("ALTER TABLE ");
        alterSQL.append(newDbTableName).append(" ADD IF NOT EXISTS ");
        for (StoragePartition partition : partitions) {
            StringBuilder partBuilder = new StringBuilder();
            String[] partitionValues = partition.getPartitionFieldValues();
            Preconditions.checkArgument(partitionFieldNames.length == partitionValues.length,
                    "Partition key parts " + Arrays.toString(partitionFieldNames)
                            + " does not match with partition values " + Arrays.toString(partitionValues)
                            + ". Check partition strategy. ");
            for (int i = 0; i < partitionFieldNames.length; i++) {
                partBuilder.append(partitionFieldNames[i]).append("=").append("'").append(partitionValues[i])
                        .append("'");
            }
            alterSQL.append("  PARTITION (").append(partBuilder.toString()).append(") LOCATION '")
                    .append(partition.getPartitionPath()).append("' ");
        }

        return alterSQL.toString();
    }

    @Override
    public void close() throws IOException {
        if (connection != null) {
            try {
                connection.close();
            } catch (SQLException e) {
                LOG.error("Could not close the connection opened ", e);
            }
        }
    }
}