qa.qcri.nadeef.core.util.sql.SQLDialectBase.java Source code

Java tutorial

Introduction

Here is the source code for qa.qcri.nadeef.core.util.sql.SQLDialectBase.java

Source

/*
 * QCRI, NADEEF LICENSE
 * NADEEF is an extensible, generalized and easy-to-deploy data cleaning platform built at QCRI.
 * NADEEF means "Clean" in Arabic
 *
 * Copyright (c) 2011-2013, Qatar Foundation for Education, Science and Community Development (on
 * behalf of Qatar Computing Research Institute) having its principle place of business in Doha,
 * Qatar with the registered address P.O box 5825 Doha, Qatar (hereinafter referred to as "QCRI")
 *
 * NADEEF has patent pending nevertheless the following is granted.
 * NADEEF is released under the terms of the MIT License, (http://opensource.org/licenses/MIT).
 */

package qa.qcri.nadeef.core.util.sql;

import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Strings;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroupFile;
import qa.qcri.nadeef.tools.CommonTools;
import qa.qcri.nadeef.tools.DBConfig;
import qa.qcri.nadeef.tools.Tracer;
import qa.qcri.nadeef.tools.sql.SQLDialect;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Path;
import java.sql.*;
import java.util.concurrent.TimeUnit;

/**
 * Interface for cross vendor Database methods.
 */
public abstract class SQLDialectBase {
    /**
     * Creates SQLDialect instance.
     * @param dialect dialect.
     * @return SQLDialectBase instance.
     */
    public static SQLDialectBase createDialectBaseInstance(SQLDialect dialect) {
        SQLDialectBase dialectInstance;
        switch (dialect) {
        default:
        case DERBYMEMORY:
        case DERBY:
            dialectInstance = new DerbySQLDialect();
            break;
        case POSTGRES:
            dialectInstance = new PostgresSQLDialect();
            break;
        case MYSQL:
            dialectInstance = new MySQLDialect();
            break;
        }
        return dialectInstance;
    }

    /**
     * Returns True when bulk loading is supported.
     * @return True when bulk loading is supported.
     */
    public boolean supportBulkLoad() {
        return false;
    }

    /**
     * Bulk load CSV file.
     * @param dbConfig DBConfig.
     * @param tableName table name.
     * @param file CSV file.
     * @param skipHeader has header.
     * @return line of rows loaded.
     */
    public int bulkLoad(DBConfig dbConfig, String tableName, Path file, boolean skipHeader) {
        throw new UnsupportedOperationException("Method is not implemented.");
    }

    /**
     * Loads CSV file when bulk load is not used.
     * @param dbConfig {@link qa.qcri.nadeef.tools.DBConfig}
     * @param tableName table name.
     * @param file CSV file.
     * @param skipHeader skip header.
     * @return line of rows loaded.
     */
    public int fallbackLoad(DBConfig dbConfig, String tableName, File file, boolean skipHeader) {
        Tracer tracer = Tracer.getTracer(SQLDialectBase.class);
        Stopwatch stopwatch = Stopwatch.createStarted();

        try (Connection conn = DBConnectionPool.createConnection(dbConfig, false);
                Statement stat = conn.createStatement();
                BufferedReader reader = new BufferedReader(new FileReader(file))) {
            ResultSet rs = stat.executeQuery(this.selectAll(tableName));
            ResultSetMetaData metaData = rs.getMetaData();
            String line;
            int lineCount = 0;
            int size = 0;
            // Batch load the data
            while ((line = reader.readLine()) != null) {
                lineCount++;
                if (Strings.isNullOrEmpty(line))
                    continue;
                if (skipHeader && lineCount == 1)
                    continue;
                size += line.toCharArray().length;
                String[] tokens = line.split(",");
                String[] newTokens = new String[tokens.length];
                for (int i = 0; i < tokens.length; i++) {
                    newTokens[i] = CommonTools.unescapeString(tokens[i], CommonTools.DOUBLE_QUOTE);
                }

                String sql = this.importFromCSV(metaData, tableName, newTokens);
                stat.addBatch(sql);

                if (lineCount % 10240 == 0) {
                    stat.executeBatch();
                }
            }

            stat.executeBatch();
            conn.commit();

            tracer.info("Dumped " + size + " bytes in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms.");
            stopwatch.stop();
            return lineCount;
        } catch (Exception ex) {
            tracer.err("Cannot load file " + file.getName(), ex);
        }
        return 0;
    }

    /**
     * Gets the template file.
     * @return template group file.
     */
    protected abstract STGroupFile getTemplate();

    /**
     * Install violation tables.
     * @param violationTableName violation table name.
     * @return SQL statement.
     */
    public String createViolationTable(String violationTableName) {
        STGroupFile template = Preconditions.checkNotNull(getTemplate());
        ST st = template.getInstanceOf("InstallViolationTable");
        st.add("violationTableName", violationTableName.toUpperCase());
        return st.render();
    }

    /**
     * Install repair tables.
     * @param repairTableName repair table name.
     * @return SQL statement.
     */
    public String createRepairTable(String repairTableName) {
        STGroupFile template = Preconditions.checkNotNull(getTemplate());
        ST st = template.getInstanceOf("InstallRepairTable");
        st.add("repairTableName", repairTableName.toUpperCase());
        return st.render();
    }

    /**
     * Install auditing tables.
     * @param auditTableName audit table name.
     * @return SQL statement.
     */
    public String createAuditTable(String auditTableName) {
        STGroupFile template = Preconditions.checkNotNull(getTemplate());
        ST st = template.getInstanceOf("InstallAuditTable");
        st.add("auditTableName", auditTableName.toUpperCase());
        return st.render();
    }

    /**
     * Next Vid.
     * @param tableName violation table name.
     * @return SQL statement.
     */
    public String nextVid(String tableName) {
        STGroupFile template = Preconditions.checkNotNull(getTemplate());
        ST st = template.getInstanceOf("NextVid");
        st.add("tableName", tableName.toUpperCase());
        return st.render();
    }

    /**
     * Creates a table in the database from a CSV file header.
     * @param tableName table name.
     * @param content table description.
     * @return SQL statement.
     */
    public String createTableFromCSV(String tableName, String content) {
        STGroupFile template = Preconditions.checkNotNull(getTemplate());
        ST st = template.getInstanceOf("CreateTableFromCSV");
        st.add("tableName", tableName.toUpperCase());
        st.add("content", content);
        return st.render();
    }

    /**
     * Copy table.
     * @param conn connection.
     * @param sourceName source name.
     * @param targetName target name.
     */
    public abstract void copyTable(Connection conn, String sourceName, String targetName) throws SQLException;

    /**
     * Drop table.
     * @param tableName drop table name.
     * @return SQL statement.
     */
    public String dropTable(String tableName) {
        return "DROP TABLE " + tableName;
    }

    /**
     * Drop index.
     * @param indexName index name.
     * @param tableName drop table name.
     * @return SQL statement.
     */
    public String dropIndex(String indexName, String tableName) {
        return "DROP INDEX " + indexName;
    }

    /**
     * Select star..
     * @param tableName table name.
     * @return SQL statement.
     */
    public String selectAll(String tableName) {
        return "SELECT * FROM " + tableName;
    }

    public String selectMaxTid(String tableName) {
        return "SELECT MAX(TID) FROM " + tableName;
    }

    public String deleteAll(String tableName) {
        return "DELETE FROM " + tableName;
    }

    /**
     * Count the number of rows in the table.
     * @param tableName table name.
     * @return SQL statement.
     */
    public abstract String countTable(String tableName);

    /**
     * Limits the select.
     * @param row row number.
     * @return SQL statement.
     */
    public abstract String limitRow(int row);

    /**
     * Inserts values into a table from CSV row used for batch loading.
     * @param metaData column meta data.
     * @param tableName target table name.
     * @param tokens row values.
     * @return SQL statement.
     */
    private String importFromCSV(ResultSetMetaData metaData, String tableName, String[] tokens) {
        StringBuilder valueBuilder = new StringBuilder(1024);
        StringBuilder columnBuilder = new StringBuilder(1024);
        try {
            int delta = tokens.length == metaData.getColumnCount() ? 0 : 1;
            for (int i = 0; i < tokens.length; i++) {
                String columnName = metaData.getColumnName(i + 1 + delta);
                String typeName = metaData.getColumnTypeName(i + 1 + delta);
                columnBuilder.append(columnName);

                if (typeName.equalsIgnoreCase("VARCHAR") || typeName.equalsIgnoreCase("CHAR")) {
                    valueBuilder.append(CommonTools.escapeString(tokens[i], CommonTools.SINGLE_QUOTE));
                } else {
                    valueBuilder.append(tokens[i]);
                }

                if (i != tokens.length - 1) {
                    valueBuilder.append(',');
                    columnBuilder.append(',');
                }
            }
        } catch (SQLException ex) {
            // type info is missing
            return "Missing SQL types when inserting";
        }

        ST st = getTemplate().getInstanceOf("InsertTableFromCSV");
        st.add("tableName", tableName);
        st.add("columns", columnBuilder.toString());
        st.add("values", valueBuilder.toString());
        return st.render();
    }
}