org.apache.gobblin.metastore.MysqlStateStore.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.metastore.MysqlStateStore.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.metastore;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.Blob;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.dbcp.BasicDataSource;
import org.apache.hadoop.io.Text;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.typesafe.config.Config;

import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.metastore.metadata.StateStoreEntryManager;
import org.apache.gobblin.metastore.predicates.StateStorePredicate;
import org.apache.gobblin.metastore.predicates.StoreNamePredicate;
import org.apache.gobblin.password.PasswordManager;
import org.apache.gobblin.util.ConfigUtils;
import org.apache.gobblin.util.io.StreamUtils;

import javax.sql.DataSource;

/**
 * An implementation of {@link StateStore} backed by MySQL.
 *
 * <p>
 *
 *     This implementation stores serialized {@link State}s as a blob in the database in the Sequence file format.
 *     The database table row is keyed by the store name and table name.
 *     State keys are state IDs (see {@link State#getId()}), and values are objects of {@link State} or
 *     any of its extensions. Keys will be empty strings if state IDs are not set
 *     (i.e., {@link State#getId()} returns <em>null</em>). In this case, the
 *     {@link MysqlStateStore#get(String, String, String)} method may not work.
 * </p>
 *
 * @param <T> state object type
 **/
public class MysqlStateStore<T extends State> implements StateStore<T> {

    // Class of the state objects to be put into the store
    private final Class<T> stateClass;
    private final DataSource dataSource;
    private final boolean compressedValues;

    private static final String UPSERT_JOB_STATE_TEMPLATE = "INSERT INTO $TABLE$ (store_name, table_name, state) VALUES(?,?,?)"
            + " ON DUPLICATE KEY UPDATE state = values(state)";

    private static final String SELECT_JOB_STATE_TEMPLATE = "SELECT state FROM $TABLE$ WHERE store_name = ? and table_name = ?";

    private static final String SELECT_JOB_STATE_WITH_LIKE_TEMPLATE = "SELECT state FROM $TABLE$ WHERE store_name = ? and table_name like ?";

    private static final String SELECT_JOB_STATE_EXISTS_TEMPLATE = "SELECT 1 FROM $TABLE$ WHERE store_name = ? and table_name = ?";

    private static final String SELECT_JOB_STATE_NAMES_TEMPLATE = "SELECT table_name FROM $TABLE$ WHERE store_name = ?";

    private static final String SELECT_STORE_NAMES_TEMPLATE = "SELECT distinct store_name FROM $TABLE$";

    private static final String DELETE_JOB_STORE_TEMPLATE = "DELETE FROM $TABLE$ WHERE store_name = ?";

    private static final String DELETE_JOB_STATE_TEMPLATE = "DELETE FROM $TABLE$ WHERE store_name = ? AND table_name = ?";

    private static final String CLONE_JOB_STATE_TEMPLATE = "INSERT INTO $TABLE$(store_name, table_name, state)"
            + " (SELECT store_name, ?, state FROM $TABLE$ s WHERE" + " store_name = ? AND table_name = ?)"
            + " ON DUPLICATE KEY UPDATE state = s.state";

    private static final String SELECT_METADATA_TEMPLATE = "SELECT store_name, table_name, modified_time from $TABLE$ where store_name like ?";

    // MySQL key length limit is 767 bytes
    private static final String CREATE_JOB_STATE_TABLE_TEMPLATE = "CREATE TABLE IF NOT EXISTS $TABLE$ (store_name varchar(100) CHARACTER SET latin1 COLLATE latin1_bin not null,"
            + "table_name varchar(667) CHARACTER SET latin1 COLLATE latin1_bin not null,"
            + " modified_time timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,"
            + " state longblob, primary key(store_name, table_name))";

    private final String UPSERT_JOB_STATE_SQL;
    private final String SELECT_JOB_STATE_SQL;
    private final String SELECT_JOB_STATE_WITH_LIKE_SQL;
    private final String SELECT_JOB_STATE_EXISTS_SQL;
    private final String SELECT_JOB_STATE_NAMES_SQL;
    private final String DELETE_JOB_STORE_SQL;
    private final String DELETE_JOB_STATE_SQL;
    private final String CLONE_JOB_STATE_SQL;
    private final String SELECT_STORE_NAMES_SQL;
    private final String SELECT_METADATA_SQL;

    /**
     * Manages the persistence and retrieval of {@link State} in a MySQL database
     * @param dataSource the {@link DataSource} object for connecting to MySQL
     * @param stateStoreTableName the table for storing the state in rows keyed by two levels (store_name, table_name)
     * @param compressedValues should values be compressed for storage?
     * @param stateClass class of the {@link State}s stored in this state store
     * @throws IOException
     */
    public MysqlStateStore(DataSource dataSource, String stateStoreTableName, boolean compressedValues,
            Class<T> stateClass) throws IOException {
        this.dataSource = dataSource;
        this.stateClass = stateClass;
        this.compressedValues = compressedValues;

        UPSERT_JOB_STATE_SQL = UPSERT_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        SELECT_JOB_STATE_SQL = SELECT_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        SELECT_JOB_STATE_WITH_LIKE_SQL = SELECT_JOB_STATE_WITH_LIKE_TEMPLATE.replace("$TABLE$",
                stateStoreTableName);
        SELECT_JOB_STATE_EXISTS_SQL = SELECT_JOB_STATE_EXISTS_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        SELECT_JOB_STATE_NAMES_SQL = SELECT_JOB_STATE_NAMES_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        DELETE_JOB_STORE_SQL = DELETE_JOB_STORE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        DELETE_JOB_STATE_SQL = DELETE_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        CLONE_JOB_STATE_SQL = CLONE_JOB_STATE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        SELECT_STORE_NAMES_SQL = SELECT_STORE_NAMES_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        SELECT_METADATA_SQL = SELECT_METADATA_TEMPLATE.replace("$TABLE$", stateStoreTableName);

        // create table if it does not exist
        String createJobTable = CREATE_JOB_STATE_TABLE_TEMPLATE.replace("$TABLE$", stateStoreTableName);
        try (Connection connection = dataSource.getConnection();
                PreparedStatement createStatement = connection.prepareStatement(createJobTable)) {
            createStatement.executeUpdate();
        } catch (SQLException e) {
            throw new IOException("Failure creation table " + stateStoreTableName, e);
        }
    }

    /**
     * creates a new {@link BasicDataSource}
     * @param config the properties used for datasource instantiation
     * @return
     */
    public static BasicDataSource newDataSource(Config config) {
        BasicDataSource basicDataSource = new BasicDataSource();
        PasswordManager passwordManager = PasswordManager.getInstance(ConfigUtils.configToProperties(config));

        basicDataSource
                .setDriverClassName(ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_DB_JDBC_DRIVER_KEY,
                        ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER));
        // MySQL server can timeout a connection so need to validate connections before use
        basicDataSource.setValidationQuery("select 1");
        basicDataSource.setTestOnBorrow(true);
        basicDataSource.setDefaultAutoCommit(false);
        basicDataSource.setTimeBetweenEvictionRunsMillis(60000);
        basicDataSource.setUrl(config.getString(ConfigurationKeys.STATE_STORE_DB_URL_KEY));
        basicDataSource.setUsername(
                passwordManager.readPassword(config.getString(ConfigurationKeys.STATE_STORE_DB_USER_KEY)));
        basicDataSource.setPassword(
                passwordManager.readPassword(config.getString(ConfigurationKeys.STATE_STORE_DB_PASSWORD_KEY)));
        basicDataSource.setMinEvictableIdleTimeMillis(
                ConfigUtils.getLong(config, ConfigurationKeys.STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME_KEY,
                        ConfigurationKeys.DEFAULT_STATE_STORE_DB_CONN_MIN_EVICTABLE_IDLE_TIME));

        return basicDataSource;
    }

    /**
     * return an identifier for the data source based on the configuration
     * @param config configuration
     * @return a {@link String} to identify the data source
     */
    public static String getDataSourceId(Config config) {
        PasswordManager passwordManager = PasswordManager.getInstance(ConfigUtils.configToProperties(config));

        return ConfigUtils.getString(config, ConfigurationKeys.STATE_STORE_DB_JDBC_DRIVER_KEY,
                ConfigurationKeys.DEFAULT_STATE_STORE_DB_JDBC_DRIVER) + "::"
                + config.getString(ConfigurationKeys.STATE_STORE_DB_URL_KEY) + "::"
                + passwordManager.readPassword(config.getString(ConfigurationKeys.STATE_STORE_DB_USER_KEY));
    }

    @Override
    public boolean create(String storeName) throws IOException {
        /* nothing to do since state will be stored as a new row in a DB table that has been validated */
        return true;
    }

    @Override
    public boolean create(String storeName, String tableName) throws IOException {
        if (exists(storeName, tableName)) {
            throw new IOException(
                    String.format("State already exists for storeName %s tableName %s", storeName, tableName));
        }

        return true;
    }

    @Override
    public boolean exists(String storeName, String tableName) throws IOException {
        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_EXISTS_SQL)) {
            int index = 0;
            queryStatement.setString(++index, storeName);
            queryStatement.setString(++index, tableName);

            try (ResultSet rs = queryStatement.executeQuery()) {
                if (rs.next()) {
                    return true;
                } else {
                    return false;
                }
            }
        } catch (SQLException e) {
            throw new IOException(
                    "Failure checking existence of storeName " + storeName + " tableName " + tableName, e);
        }
    }

    /**
     * Serializes the state to the {@link DataOutput}
     * @param dataOutput output target receiving the serialized data
     * @param state the state to serialize
     * @throws IOException
     */
    private void addStateToDataOutputStream(DataOutput dataOutput, T state) throws IOException {
        new Text(Strings.nullToEmpty(state.getId())).write(dataOutput);
        state.write(dataOutput);
    }

    @Override
    public void put(String storeName, String tableName, T state) throws IOException {
        putAll(storeName, tableName, Collections.singleton(state));
    }

    @Override
    public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
        try (Connection connection = dataSource.getConnection();
                PreparedStatement insertStatement = connection.prepareStatement(UPSERT_JOB_STATE_SQL);
                ByteArrayOutputStream byteArrayOs = new ByteArrayOutputStream();
                OutputStream os = compressedValues ? new GZIPOutputStream(byteArrayOs) : byteArrayOs;
                DataOutputStream dataOutput = new DataOutputStream(os)) {

            int index = 0;
            insertStatement.setString(++index, storeName);
            insertStatement.setString(++index, tableName);

            for (T state : states) {
                addStateToDataOutputStream(dataOutput, state);
            }

            dataOutput.close();
            insertStatement.setBlob(++index, new ByteArrayInputStream(byteArrayOs.toByteArray()));

            insertStatement.executeUpdate();
            connection.commit();
        } catch (SQLException e) {
            throw new IOException("Failure storing state to store " + storeName + " table " + tableName, e);
        }
    }

    @Override
    public T get(String storeName, String tableName, String stateId) throws IOException {
        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_SQL)) {
            int index = 0;
            queryStatement.setString(++index, storeName);
            queryStatement.setString(++index, tableName);

            try (ResultSet rs = queryStatement.executeQuery()) {
                if (rs.next()) {
                    Blob blob = rs.getBlob(1);
                    Text key = new Text();

                    try (InputStream is = StreamUtils.isCompressed(blob.getBytes(1, 2))
                            ? new GZIPInputStream(blob.getBinaryStream())
                            : blob.getBinaryStream(); DataInputStream dis = new DataInputStream(is)) {
                        // keep deserializing while we have data
                        while (dis.available() > 0) {
                            T state = this.stateClass.newInstance();

                            key.readFields(dis);
                            state.readFields(dis);

                            if (key.toString().equals(stateId)) {
                                return state;
                            }
                        }
                    } catch (EOFException e) {
                        // no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
                    }
                }
            }
        } catch (RuntimeException e) {
            throw e;
        } catch (Exception e) {
            throw new IOException(
                    "failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
        }

        return null;
    }

    protected List<T> getAll(String storeName, String tableName, boolean useLike) throws IOException {
        List<T> states = Lists.newArrayList();

        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection
                        .prepareStatement(useLike ? SELECT_JOB_STATE_WITH_LIKE_SQL : SELECT_JOB_STATE_SQL)) {
            queryStatement.setString(1, storeName);
            queryStatement.setString(2, tableName);

            try (ResultSet rs = queryStatement.executeQuery()) {
                while (rs.next()) {
                    Blob blob = rs.getBlob(1);
                    Text key = new Text();

                    try (InputStream is = StreamUtils.isCompressed(blob.getBytes(1, 2))
                            ? new GZIPInputStream(blob.getBinaryStream())
                            : blob.getBinaryStream(); DataInputStream dis = new DataInputStream(is)) {
                        // keep deserializing while we have data
                        while (dis.available() > 0) {
                            T state = this.stateClass.newInstance();
                            key.readString(dis);
                            state.readFields(dis);
                            states.add(state);
                        }
                    } catch (EOFException e) {
                        // no more data. GZIPInputStream.available() doesn't return 0 until after EOF.
                    }
                }
            }
        } catch (RuntimeException re) {
            throw re;
        } catch (Exception e) {
            throw new IOException(
                    "failure retrieving state from storeName " + storeName + " tableName " + tableName, e);
        }

        return states;
    }

    @Override
    public List<T> getAll(String storeName, String tableName) throws IOException {
        return getAll(storeName, tableName, false);
    }

    @Override
    public List<T> getAll(String storeName) throws IOException {
        return getAll(storeName, "%", true);
    }

    @Override
    public List<String> getTableNames(String storeName, Predicate<String> predicate) throws IOException {
        List<String> names = Lists.newArrayList();

        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(SELECT_JOB_STATE_NAMES_SQL)) {
            queryStatement.setString(1, storeName);

            try (ResultSet rs = queryStatement.executeQuery()) {
                while (rs.next()) {
                    String name = rs.getString(1);
                    if (predicate.apply(name)) {
                        names.add(name);
                    }
                }
            }
        } catch (SQLException e) {
            throw new IOException(String.format("Could not query table names for store %s", storeName), e);
        }

        return names;
    }

    /**
     * Get store names in the state store
     *
     * @param predicate only returns names matching predicate
     * @return (possibly empty) list of store names from the given store
     * @throws IOException
     */
    public List<String> getStoreNames(Predicate<String> predicate) throws IOException {
        List<String> names = Lists.newArrayList();

        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(SELECT_STORE_NAMES_SQL)) {

            try (ResultSet rs = queryStatement.executeQuery()) {
                while (rs.next()) {
                    String name = rs.getString(1);
                    if (predicate.apply(name)) {
                        names.add(name);
                    }
                }
            }
        } catch (SQLException e) {
            throw new IOException(String.format("Could not query store names"), e);
        }

        return names;
    }

    @Override
    public void createAlias(String storeName, String original, String alias) throws IOException {

        if (!exists(storeName, original)) {
            throw new IOException(String.format("State does not exist for table %s", original));
        }

        try (Connection connection = dataSource.getConnection();
                PreparedStatement cloneStatement = connection.prepareStatement(CLONE_JOB_STATE_SQL)) {
            int index = 0;
            cloneStatement.setString(++index, alias);
            cloneStatement.setString(++index, storeName);
            cloneStatement.setString(++index, original);
            cloneStatement.executeUpdate();
            connection.commit();
        } catch (SQLException e) {
            throw new IOException(
                    String.format("Failure creating alias for store %s original %s", storeName, original), e);
        }
    }

    @Override
    public void delete(String storeName, String tableName) throws IOException {
        try (Connection connection = dataSource.getConnection();
                PreparedStatement deleteStatement = connection.prepareStatement(DELETE_JOB_STATE_SQL)) {
            int index = 0;
            deleteStatement.setString(++index, storeName);
            deleteStatement.setString(++index, tableName);
            deleteStatement.executeUpdate();
            connection.commit();
        } catch (SQLException e) {
            throw new IOException("failure deleting storeName " + storeName + " tableName " + tableName, e);
        }
    }

    @Override
    public void delete(String storeName) throws IOException {
        try (Connection connection = dataSource.getConnection();
                PreparedStatement deleteStatement = connection.prepareStatement(DELETE_JOB_STORE_SQL)) {
            deleteStatement.setString(1, storeName);
            deleteStatement.executeUpdate();
            connection.commit();
        } catch (SQLException e) {
            throw new IOException("failure deleting storeName " + storeName, e);
        }
    }

    /**
     * Gets entry managers for all tables matching the predicate
     * @param predicate Predicate used to filter tables. To allow state stores to push down predicates, use native extensions
     *                  of {@link StateStorePredicate}.
     * @throws IOException
     */
    @Override
    public List<? extends StateStoreEntryManager> getMetadataForTables(StateStorePredicate predicate)
            throws IOException {
        List<MysqlStateStoreEntryManager> entryManagers = Lists.newArrayList();

        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(SELECT_METADATA_SQL)) {
            String storeName = predicate instanceof StoreNamePredicate
                    ? ((StoreNamePredicate) predicate).getStoreName()
                    : "%";
            queryStatement.setString(1, storeName);

            try (ResultSet rs = queryStatement.executeQuery()) {
                while (rs.next()) {
                    String rsStoreName = rs.getString(1);
                    String rsTableName = rs.getString(2);
                    Timestamp timestamp = rs.getTimestamp(3);

                    StateStoreEntryManager entryManager = new MysqlStateStoreEntryManager(rsStoreName, rsTableName,
                            timestamp.getTime(), this);

                    if (predicate.apply(entryManager)) {
                        entryManagers.add(new MysqlStateStoreEntryManager(rsStoreName, rsTableName,
                                timestamp.getTime(), this));
                    }
                }
            }
        } catch (SQLException e) {
            throw new IOException("failure getting metadata for tables", e);
        }

        return entryManagers;
    }

    /**
     * For setting timestamps in tests
     * @param timestamp 0 to set to default, non-zero to set an epoch time
     * @throws SQLException
     */
    @VisibleForTesting
    public void setTestTimestamp(long timestamp) throws IOException {
        String statement = "SET TIMESTAMP =";

        // 0 is used to reset to the default
        if (timestamp > 0) {
            statement += timestamp;
        } else {
            statement += " DEFAULT";
        }

        try (Connection connection = dataSource.getConnection();
                PreparedStatement queryStatement = connection.prepareStatement(statement)) {
            queryStatement.execute();
        } catch (SQLException e) {
            throw new IOException("Could not set timestamp " + timestamp, e);
        }
    }
}