org.apache.falcon.hive.util.EventUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.hive.util.EventUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.hive.util;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.hive.HiveDRArgs;
import org.apache.falcon.hive.exception.HiveReplicationException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hive.hcatalog.api.repl.Command;
import org.apache.hive.hcatalog.api.repl.ReplicationUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 * Utility class to handle Hive events for hive-mirroring.
 */
public class EventUtils {
    private static final String DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    private static final int TIMEOUT_IN_SECS = 300;
    private static final String JDBC_PREFIX = "jdbc:";
    private static final int RETRY_ATTEMPTS = 3;

    private Configuration conf = null;
    private String sourceHiveServer2Uri = null;
    private String sourceHS2UriExtraOptions = null;
    private String sourceDatabase = null;
    private String sourceNN = null;
    private String sourceNNKerberosPrincipal = null;
    private String jobNN = null;
    private String jobNNKerberosPrincipal = null;
    private String targetHiveServer2Uri = null;
    private String targetHS2UriExtraOptions = null;
    private String sourceStagingPath = null;
    private String targetStagingPath = null;
    private String targetNN = null;
    private String targetNNKerberosPrincipal = null;
    private String sourceStagingUri = null;
    private String targetStagingUri = null;
    private List<Path> sourceCleanUpList = null;
    private List<Path> targetCleanUpList = null;
    private static final Logger LOG = LoggerFactory.getLogger(EventUtils.class);

    private FileSystem sourceFileSystem = null;
    private FileSystem jobFileSystem = null;
    private FileSystem targetFileSystem = null;
    private Connection sourceConnection = null;
    private Connection targetConnection = null;
    private Statement sourceStatement = null;
    private Statement targetStatement = null;

    private Map<String, Long> countersMap = null;

    private List<ReplicationStatus> listReplicationStatus;

    public EventUtils(Configuration conf) {
        this.conf = conf;
        sourceHiveServer2Uri = conf.get(HiveDRArgs.SOURCE_HS2_URI.getName());
        sourceHS2UriExtraOptions = conf.get(HiveDRArgs.SOURCE_HS2_URI_EXTRA_OPTS.getName());
        sourceDatabase = conf.get(HiveDRArgs.SOURCE_DATABASE.getName());
        sourceNN = conf.get(HiveDRArgs.SOURCE_NN.getName());
        sourceNNKerberosPrincipal = conf.get(HiveDRArgs.SOURCE_NN_KERBEROS_PRINCIPAL.getName());
        sourceStagingPath = conf.get(HiveDRArgs.SOURCE_STAGING_PATH.getName());
        jobNN = conf.get(HiveDRArgs.JOB_CLUSTER_NN.getName());
        jobNNKerberosPrincipal = conf.get(HiveDRArgs.JOB_CLUSTER_NN_KERBEROS_PRINCIPAL.getName());
        targetHiveServer2Uri = conf.get(HiveDRArgs.TARGET_HS2_URI.getName());
        targetHS2UriExtraOptions = conf.get(HiveDRArgs.TARGET_HS2_URI_EXTRA_OPTS.getName());
        targetStagingPath = conf.get(HiveDRArgs.TARGET_STAGING_PATH.getName());
        targetNN = conf.get(HiveDRArgs.TARGET_NN.getName());
        targetNNKerberosPrincipal = conf.get(HiveDRArgs.TARGET_NN_KERBEROS_PRINCIPAL.getName());
        sourceCleanUpList = new ArrayList<>();
        targetCleanUpList = new ArrayList<>();
        countersMap = new HashMap<>();
    }

    public void setupConnection() throws Exception {
        Class.forName(DRIVER_NAME);
        DriverManager.setLoginTimeout(TIMEOUT_IN_SECS);
        String authTokenString = ";auth=delegationToken";
        //To bypass findbugs check, need to store empty password in Properties.
        Properties password = new Properties();
        password.put("password", "");
        String user = "";

        UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
        if (currentUser != null) {
            user = currentUser.getShortUserName();
        }

        if (conf.get(HiveDRArgs.EXECUTION_STAGE.getName())
                .equalsIgnoreCase(HiveDRUtils.ExecutionStage.EXPORT.name())) {
            String authString = null;
            if (StringUtils.isNotEmpty(conf.get(HiveDRArgs.SOURCE_HIVE2_KERBEROS_PRINCIPAL.getName()))) {
                authString = authTokenString;
            }

            String connString = getSourceHS2ConnectionUrl(authString);
            sourceConnection = DriverManager.getConnection(connString, user, password.getProperty("password"));
            sourceStatement = sourceConnection.createStatement();
        } else {
            String authString = null;
            if (StringUtils.isNotEmpty(conf.get(HiveDRArgs.TARGET_HIVE2_KERBEROS_PRINCIPAL.getName()))) {
                authString = authTokenString;
            }
            String connString = getTargetHS2ConnectionUrl(authString);
            targetConnection = DriverManager.getConnection(connString, user, password.getProperty("password"));
            targetStatement = targetConnection.createStatement();
        }
    }

    private String getSourceHS2ConnectionUrl(final String authTokenString) {
        return getHS2ConnectionUrl(sourceHiveServer2Uri, sourceDatabase, authTokenString, sourceHS2UriExtraOptions);
    }

    private String getTargetHS2ConnectionUrl(final String authTokenString) {
        return getHS2ConnectionUrl(targetHiveServer2Uri, sourceDatabase, authTokenString, targetHS2UriExtraOptions);
    }

    public static String getHS2ConnectionUrl(final String hs2Uri, final String database,
            final String authTokenString, final String hs2UriExtraOpts) {
        StringBuilder connString = new StringBuilder();
        connString.append(JDBC_PREFIX).append(StringUtils.removeEnd(hs2Uri, "/")).append("/").append(database);

        if (StringUtils.isNotBlank(authTokenString)) {
            connString.append(authTokenString);
        }

        if (StringUtils.isNotBlank(hs2UriExtraOpts) && !("NA".equalsIgnoreCase(hs2UriExtraOpts))) {
            if (!hs2UriExtraOpts.startsWith(";")) {
                connString.append(";");
            }
            connString.append(hs2UriExtraOpts);
        }

        LOG.info("getHS2ConnectionUrl connection uri: {}", connString);
        return connString.toString();
    }

    public void initializeFS() throws IOException {
        LOG.info("Initializing staging directory");
        sourceStagingUri = new Path(sourceNN, sourceStagingPath).toString();
        targetStagingUri = new Path(targetNN, targetStagingPath).toString();
        sourceFileSystem = FileSystem.get(FileUtils.getConfiguration(conf, sourceNN, sourceNNKerberosPrincipal));
        jobFileSystem = FileSystem.get(FileUtils.getConfiguration(conf, jobNN, jobNNKerberosPrincipal));
        targetFileSystem = FileSystem.get(FileUtils.getConfiguration(conf, targetNN, targetNNKerberosPrincipal));
    }

    private String readEvents(Path eventFileName) throws IOException {
        StringBuilder eventString = new StringBuilder();
        BufferedReader in = new BufferedReader(new InputStreamReader(jobFileSystem.open(eventFileName)));
        try {
            String line;
            while ((line = in.readLine()) != null) {
                eventString.append(line);
                eventString.append(DelimiterUtils.NEWLINE_DELIM);
            }
        } catch (Exception e) {
            throw new IOException(e);
        } finally {
            IOUtils.closeQuietly(in);
        }

        return eventString.toString();
    }

    public void processEvents(String event) throws Exception {
        listReplicationStatus = new ArrayList<>();
        String[] eventSplit = event.split(DelimiterUtils.FIELD_DELIM);
        String dbName = new String(Base64.decodeBase64(eventSplit[0]), "UTF-8");
        String tableName = new String(Base64.decodeBase64(eventSplit[1]), "UTF-8");
        String exportEventStr;
        String importEventStr;
        if (conf.get(HiveDRArgs.EXECUTION_STAGE.getName())
                .equalsIgnoreCase(HiveDRUtils.ExecutionStage.EXPORT.name())) {
            exportEventStr = readEvents(new Path(eventSplit[2]));
            if (StringUtils.isNotEmpty(exportEventStr)) {
                LOG.info("Process the export statements for db {} table {}", dbName, tableName);
                processCommands(exportEventStr, dbName, tableName, sourceStatement, sourceCleanUpList, false);
                if (!sourceCleanUpList.isEmpty()) {
                    invokeCopy();
                }
            }
        } else if (conf.get(HiveDRArgs.EXECUTION_STAGE.getName())
                .equalsIgnoreCase(HiveDRUtils.ExecutionStage.IMPORT.name())) {
            importEventStr = readEvents(new Path(eventSplit[3]));
            if (StringUtils.isNotEmpty(importEventStr)) {
                LOG.info("Process the import statements for db {} table {}", dbName, tableName);
                processCommands(importEventStr, dbName, tableName, targetStatement, targetCleanUpList, true);
            }
        }
    }

    public List<ReplicationStatus> getListReplicationStatus() {
        return listReplicationStatus;
    }

    private void processCommands(String eventStr, String dbName, String tableName, Statement sqlStmt,
            List<Path> cleanUpList, boolean isImportStatements)
            throws SQLException, HiveReplicationException, IOException {
        String[] commandList = eventStr.split(DelimiterUtils.NEWLINE_DELIM);
        List<Command> deserializeCommand = new ArrayList<>();
        for (String command : commandList) {
            Command cmd = ReplicationUtils.deserializeCommand(command);
            deserializeCommand.add(cmd);
            List<String> cleanupLocations = cmd.cleanupLocationsAfterEvent();
            cleanUpList.addAll(getCleanUpPaths(cleanupLocations));
        }
        for (Command cmd : deserializeCommand) {
            try {
                LOG.debug("Executing command : {} : {} ", cmd.getEventId(), cmd.toString());
                executeCommand(cmd, dbName, tableName, sqlStmt, isImportStatements, 0);
            } catch (Exception e) {
                // clean up locations before failing.
                cleanupEventLocations(sourceCleanUpList, sourceFileSystem);
                cleanupEventLocations(targetCleanUpList, targetFileSystem);
                throw new HiveReplicationException("Could not process replication command for " + " DB Name:"
                        + dbName + ", Table Name:" + tableName, e);
            }
        }
    }

    private void executeCommand(Command cmd, String dbName, String tableName, Statement sqlStmt,
            boolean isImportStatements, int attempt) throws HiveReplicationException, SQLException, IOException {
        for (final String stmt : cmd.get()) {
            executeSqlStatement(cmd, dbName, tableName, sqlStmt, stmt, isImportStatements, attempt);
        }
        if (isImportStatements) {
            addReplicationStatus(ReplicationStatus.Status.SUCCESS, dbName, tableName, cmd.getEventId());
        }
    }

    private void executeSqlStatement(Command cmd, String dbName, String tableName, Statement sqlStmt, String stmt,
            boolean isImportStatements, int attempt) throws HiveReplicationException, SQLException, IOException {
        try {
            sqlStmt.execute(stmt);
        } catch (SQLException sqeOuter) {
            // Retry if command is retriable.
            if (attempt < RETRY_ATTEMPTS && cmd.isRetriable()) {
                if (isImportStatements) {
                    try {
                        cleanupEventLocations(getCleanUpPaths(cmd.cleanupLocationsPerRetry()), targetFileSystem);
                    } catch (IOException ioe) {
                        // Clean up failed before retry on target. Update failure status and return
                        addReplicationStatus(ReplicationStatus.Status.FAILURE, dbName, tableName, cmd.getEventId());
                        throw ioe;
                    }
                } else {
                    cleanupEventLocations(getCleanUpPaths(cmd.cleanupLocationsPerRetry()), sourceFileSystem);
                }
                executeCommand(cmd, dbName, tableName, sqlStmt, isImportStatements, ++attempt);
                return; // Retry succeeded, return without throwing an exception.
            }
            // If we reached here, retries have failed.
            LOG.error("SQL Exception: {}", sqeOuter);
            undoCommand(cmd, dbName, tableName, sqlStmt, isImportStatements);
            if (isImportStatements) {
                addReplicationStatus(ReplicationStatus.Status.FAILURE, dbName, tableName, cmd.getEventId());
            }
            throw sqeOuter;
        }
    }

    private static List<Path> getCleanUpPaths(List<String> cleanupLocations) {
        List<Path> cleanupLocationPaths = new ArrayList<>();
        for (String cleanupLocation : cleanupLocations) {
            cleanupLocationPaths.add(new Path(cleanupLocation));
        }
        return cleanupLocationPaths;
    }

    private void undoCommand(Command cmd, String dbName, String tableName, Statement sqlStmt,
            boolean isImportStatements) throws SQLException, HiveReplicationException {
        if (cmd.isUndoable()) {
            try {
                List<String> undoCommands = cmd.getUndo();
                LOG.debug("Undo command: {}", StringUtils.join(undoCommands.toArray()));
                if (undoCommands.size() != 0) {
                    for (final String undoStmt : undoCommands) {
                        sqlStmt.execute(undoStmt);
                    }
                }
            } catch (SQLException sqeInner) {
                if (isImportStatements) {
                    addReplicationStatus(ReplicationStatus.Status.FAILURE, dbName, tableName, cmd.getEventId());
                }
                LOG.error("SQL Exception: {}", sqeInner);
                throw sqeInner;
            }
        }
    }

    private void addReplicationStatus(ReplicationStatus.Status status, String dbName, String tableName,
            long eventId) throws HiveReplicationException {
        try {
            String drJobName = conf.get(HiveDRArgs.JOB_NAME.getName());
            ReplicationStatus rs = new ReplicationStatus(conf.get(HiveDRArgs.SOURCE_CLUSTER.getName()),
                    conf.get(HiveDRArgs.TARGET_CLUSTER.getName()), drJobName, dbName, tableName, status, eventId);
            listReplicationStatus.add(rs);
        } catch (HiveReplicationException hre) {
            throw new HiveReplicationException("Could not update replication status store for " + " EventId:"
                    + eventId + " DB Name:" + dbName + " Table Name:" + tableName + hre.toString());
        }
    }

    public void invokeCopy() throws Exception {
        DistCpOptions options = getDistCpOptions();
        DistCp distCp = new DistCp(conf, options);
        LOG.info("Started DistCp with source Path: {} \ttarget path: {}", sourceStagingUri, targetStagingUri);

        Job distcpJob = distCp.execute();
        LOG.info("Distp Hadoop job: {}", distcpJob.getJobID().toString());
        LOG.info("Completed DistCp");
        if (distcpJob.getStatus().getState() == JobStatus.State.SUCCEEDED) {
            countersMap = HiveDRUtils.fetchReplicationCounters(conf, distcpJob);
        }
    }

    public DistCpOptions getDistCpOptions() {
        // DistCpOptions expects the first argument to be a file OR a list of Paths
        List<Path> sourceUris = new ArrayList<>();
        sourceUris.add(new Path(sourceStagingUri));
        DistCpOptions distcpOptions = new DistCpOptions(sourceUris, new Path(targetStagingUri));

        // setSyncFolder(true) ensures directory structure is maintained when source is copied to target
        distcpOptions.setSyncFolder(true);
        // skipCRCCheck if TDE is enabled.
        if (Boolean.parseBoolean(conf.get(HiveDRArgs.TDE_ENCRYPTION_ENABLED.getName()))) {
            distcpOptions.setSkipCRC(true);
        }
        distcpOptions.setBlocking(true);
        distcpOptions.setMaxMaps(Integer.parseInt(conf.get(HiveDRArgs.DISTCP_MAX_MAPS.getName())));
        distcpOptions.setMapBandwidth(Integer.parseInt(conf.get(HiveDRArgs.DISTCP_MAP_BANDWIDTH.getName())));
        return distcpOptions;
    }

    public Long getCounterValue(String counterKey) {
        return countersMap.get(counterKey);
    }

    public boolean isCountersMapEmpty() {
        return countersMap.size() == 0;
    }

    public void cleanEventsDirectory() throws IOException {
        LOG.info("Cleaning staging directory");
        cleanupEventLocations(sourceCleanUpList, sourceFileSystem);
        cleanupEventLocations(targetCleanUpList, targetFileSystem);
    }

    private void cleanupEventLocations(List<Path> cleanupList, FileSystem fileSystem) throws IOException {
        for (Path cleanUpPath : cleanupList) {
            try {
                fileSystem.delete(cleanUpPath, true);
            } catch (IOException ioe) {
                LOG.error("Cleaning up of staging directory {} failed {}", cleanUpPath, ioe.toString());
                throw ioe;
            }
        }

    }

    public void closeConnection() throws SQLException {
        if (sourceStatement != null) {
            sourceStatement.close();
        }

        if (targetStatement != null) {
            targetStatement.close();
        }

        if (sourceConnection != null) {
            sourceConnection.close();
        }
        if (targetConnection != null) {
            targetConnection.close();
        }
    }
}