com.alibaba.otter.node.etl.extract.extractor.DatabaseExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.otter.node.etl.extract.extractor.DatabaseExtractor.java

Source

/*
 * Copyright (C) 2010-2101 Alibaba Group Holding Limited.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.otter.node.etl.extract.extractor;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.SystemUtils;
import org.apache.ddlutils.model.Column;
import org.apache.ddlutils.model.Table;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;

import com.alibaba.otter.node.etl.OtterConstants;
import com.alibaba.otter.node.etl.common.db.dialect.DbDialect;
import com.alibaba.otter.node.etl.common.db.dialect.oracle.OracleDialect;
import com.alibaba.otter.node.etl.common.db.utils.SqlUtils;
import com.alibaba.otter.node.etl.extract.exceptions.ExtractException;
import com.alibaba.otter.shared.common.model.config.ConfigHelper;
import com.alibaba.otter.shared.common.model.config.data.ColumnPair;
import com.alibaba.otter.shared.common.model.config.data.DataMedia;
import com.alibaba.otter.shared.common.model.config.data.DataMediaPair;
import com.alibaba.otter.shared.common.model.config.data.db.DbMediaSource;
import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline;
import com.alibaba.otter.shared.common.utils.thread.NamedThreadFactory;
import com.alibaba.otter.shared.etl.model.DbBatch;
import com.alibaba.otter.shared.etl.model.EventColumn;
import com.alibaba.otter.shared.etl.model.EventColumnIndexComparable;
import com.alibaba.otter.shared.etl.model.EventData;

/**
 * ??? , ? {@linkplain DatabaseExtractWorker}
 * 
 * <pre>
 * 
 *  1. ??? = ( ? / (poolsize + 1) ) * ??
 *  2. ????(?)
 *  3. {@linkplain DatabaseExtractWorker}????Thread.currentThread().isInterrupted(),dbcpdriver??
 *  4. ??????update=true??????update=true???? (modify by ljh at 2012-11-04)
 * </pre>
 * 
 * @author jianghang 2012-4-18 ?04:53:15
 * @version 4.0.2
 */
public class DatabaseExtractor extends AbstractExtractor<DbBatch> implements InitializingBean, DisposableBean {

    private static final String WORKER_NAME = "DataBaseExtractor";
    private static final String WORKER_NAME_FORMAT = "pipelineId = %s , pipelineName = %s , " + WORKER_NAME;
    private static final Logger logger = LoggerFactory.getLogger(DatabaseExtractor.class);
    private static final int DEFAULT_POOL_SIZE = 5;
    private static final int retryTimes = 3;
    private int poolSize = DEFAULT_POOL_SIZE;
    private ExecutorService executor;

    @Override
    public void extract(DbBatch dbBatch) throws ExtractException {
        Assert.notNull(dbBatch);
        Assert.notNull(dbBatch.getRowBatch());
        // ??
        Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
        boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
        boolean isRow = pipeline.getParameters().getSyncMode().isRow();// ???
        // ??
        adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // Extractor?
        ExecutorCompletionService completionService = new ExecutorCompletionService(executor);

        // ???
        ExtractException exception = null;
        // ??
        List<DataItem> items = new ArrayList<DataItem>();
        List<Future> futures = new ArrayList<Future>();
        List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
        for (EventData eventData : eventDatas) {
            if (eventData.getEventType().isDdl()) {
                continue;
            }

            DataItem item = new DataItem(eventData);
            // row??????row???
            boolean flag = mustDb
                    || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia());

            // ?case, oracle erosa??????
            if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
                DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
                if (dataMedia.getSource().getType().isOracle()) {
                    flag |= true;
                    eventData.setRemedy(true);// ???erosa?????
                }
            }

            if (isRow && !flag) {
                // ?????
                // view??
                flag = checkNeedDbForRowMode(pipeline, eventData);
            }

            if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {// ????
                Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // ??
                if (future.isDone()) {
                    // ?CallerRun????
                    try {
                        future.get();
                    } catch (InterruptedException e) {
                        cancel(futures);// ??
                        throw new ExtractException(e);
                    } catch (ExecutionException e) {
                        cancel(futures); // ??
                        throw new ExtractException(e);
                    }
                }

                futures.add(future);// 
            }

            items.add(item);// ?
        }

        // ?
        int index = 0;
        while (index < futures.size()) { // ??
            try {
                Future future = completionService.take();// ?
                future.get();
            } catch (InterruptedException e) {
                exception = new ExtractException(e);
                break;// future
            } catch (ExecutionException e) {
                exception = new ExtractException(e);
                break;// future
            }

            index++;
        }

        if (index < futures.size()) {
            // ???cancel?????
            cancel(futures);
            throw exception;
        } else {
            // ?, ????
            for (int i = 0; i < items.size(); i++) {
                DataItem item = items.get(i);
                if (item.filter) { // ???????
                    eventDatas.remove(item.getEventData());
                }
            }
        }

    }

    private boolean checkNeedDbForRowMode(Pipeline pipeline, EventData eventData) {
        // ???
        DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
        DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(),
                (DbMediaSource) dataMedia.getSource());
        Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
        if (table.getColumnCount() == eventData.getColumns().size() + eventData.getKeys().size()) {
            return false;
        } else {
            return true;
        }
    }

    // ??
    private void cancel(List<Future> futures) {
        for (int i = 0; i < futures.size(); i++) {
            Future future = futures.get(i);
            if (future.isDone() == false) {
                future.cancel(true);// ??
            }
        }
    }

    // 
    private void adjustPoolSize(int newPoolSize) {
        if (newPoolSize != poolSize) {
            poolSize = newPoolSize;
            if (executor instanceof ThreadPoolExecutor) {
                ThreadPoolExecutor pool = (ThreadPoolExecutor) executor;
                pool.setCorePoolSize(newPoolSize);
                pool.setMaximumPoolSize(newPoolSize);
            }
        }
    }

    public void afterPropertiesSet() throws Exception {
        executor = new ThreadPoolExecutor(poolSize, poolSize, 0L, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue(poolSize * 4), new NamedThreadFactory(WORKER_NAME),
                new ThreadPoolExecutor.CallerRunsPolicy());
    }

    public void destroy() throws Exception {
        executor.shutdownNow();
    }

    // ?
    class DataItem {

        private EventData eventData;
        private boolean filter = false;

        public DataItem(EventData eventData) {
            this.eventData = eventData;
        }

        public EventData getEventData() {
            return eventData;
        }

        public void setEventData(EventData eventData) {
            this.eventData = eventData;
        }

        public boolean isFilter() {
            return filter;
        }

        public void setFilter(boolean filter) {
            this.filter = filter;
        }

    }

    /**
     * ???????
     * 
     * @author jianghang 2012-4-19 ?05:14:18
     * @version 4.0.2
     */
    class DatabaseExtractWorker implements Runnable {

        private final int event_default_capacity = 1024; // StringBuilder??
        private String eventData_format = null;
        private final String SEP = SystemUtils.LINE_SEPARATOR;

        private Pipeline pipeline;
        private DataItem item;
        private EventData eventData;
        {
            eventData_format = "-----------------" + SEP;
            eventData_format += "- PairId: {0} , TableId: {1} " + SEP;
            eventData_format += "-----------------" + SEP;
            eventData_format += "---START" + SEP;
            eventData_format += "---Pks" + SEP;
            eventData_format += "{2}" + SEP;
            eventData_format += "---Sql" + SEP;
            eventData_format += "{3}" + SEP;
            eventData_format += "---END" + SEP;
        }

        public DatabaseExtractWorker(Pipeline pipeline, DataItem item) {
            this.pipeline = pipeline;
            this.item = item;
            this.eventData = item.getEventData();
        }

        public void run() {
            try {
                MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeline.getId()));
                Thread.currentThread()
                        .setName(String.format(WORKER_NAME_FORMAT, pipeline.getId(), pipeline.getName()));
                // ???
                DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
                DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(),
                        (DbMediaSource) dataMedia.getSource());
                Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName());
                TableData keyTableData = buildTableData(table, eventData.getKeys());

                // oracle?
                if (dbDialect instanceof OracleDialect) {
                    keyTableData.columnTypes = getOraclePkTypes(table, keyTableData.columnNames);
                }

                boolean needAll = pipeline.getParameters().getSyncMode().isRow()
                        || (eventData.getSyncMode() != null && eventData.getSyncMode().isRow());

                // ?case, oracle erosa???????
                needAll |= CollectionUtils.isEmpty(eventData.getUpdatedColumns())
                        && dataMedia.getSource().getType().isOracle();

                List<DataMediaPair> mediaParis = ConfigHelper.findDataMediaPairByMediaId(pipeline,
                        dataMedia.getId());
                List<String> viewColumnNames = buildMaxColumnsFromColumnPairs(mediaParis, eventData.getKeys());

                // TODO ?
                // if (needAll) {
                // boolean needDb = checkNeedDbForRowMode(table,
                // viewColumnNames, eventData);
                // if (needAll && !needDb) {// ????
                // item.setFilter(false);
                // return;
                // }
                // }

                // modified by ljh at 2012-11-04
                // ??????update=true?update=false??filter??????
                TableData columnTableData = buildTableData(table, eventData.getUpdatedColumns(), needAll,
                        viewColumnNames);

                if (columnTableData.columnNames.length == 0) {
                    // ????
                } else {
                    List<String> newColumnValues = select(dbDialect, eventData.getSchemaName(),
                            eventData.getTableName(), keyTableData, columnTableData);

                    if (newColumnValues == null) {
                        // miss from db
                        // filter=true??.
                        // ???otter???
                        // item.setFilter(true);

                        // ?????
                        // a. ????
                        // b. ????
                        boolean needFilter = eventData.isRemedy() || pipeline.getParameters().getSkipNoRow();
                        item.setFilter(needFilter);

                        // ???item
                        int index = 0;
                        for (EventColumn oldKey : eventData.getOldKeys()) {
                            if (!oldKey.equals(eventData.getKeys().get(index))) {
                                item.setFilter(false);
                                break;
                            }
                        }
                    } else {
                        // ??
                        List<EventColumn> newEventColumns = new ArrayList<EventColumn>();
                        for (int i = 0; i < newColumnValues.size(); i++) {
                            EventColumn column = new EventColumn();
                            column.setIndex(columnTableData.indexs[i]);
                            column.setColumnName(columnTableData.columnNames[i]);
                            column.setColumnType(columnTableData.columnTypes[i]);
                            column.setNull(newColumnValues.get(i) == null);
                            column.setColumnValue(newColumnValues.get(i));
                            column.setUpdate(true);
                            newEventColumns.add(column);
                        }

                        // ?columns???
                        for (EventColumn column : eventData.getColumns()) {
                            boolean override = false;
                            for (EventColumn newEventColumn : newEventColumns) {
                                if (StringUtils.equalsIgnoreCase(newEventColumn.getColumnName(),
                                        column.getColumnName())) {
                                    override = true;
                                    break;
                                }
                            }

                            if (!override) {// newcolumns?
                                newEventColumns.add(column);
                            }
                        }

                        Collections.sort(newEventColumns, new EventColumnIndexComparable()); // ??
                        eventData.setColumns(newEventColumns);
                    }
                }
            } catch (InterruptedException e) {
                // ignore
            } finally {
                Thread.currentThread().setName(WORKER_NAME);
                MDC.remove(OtterConstants.splitPipelineLogFileKey);
            }
        }

        /**
         * ??columnPair?????
         */
        private List<String> buildMaxColumnsFromColumnPairs(List<DataMediaPair> mediaPairs, List<EventColumn> pks) {
            Set<String> allColumns = new HashSet<String>();
            Map<String, EventColumn> pkMap = new HashMap<String, EventColumn>(pks.size(), 1f);
            for (EventColumn pk : pks) {
                pkMap.put(StringUtils.lowerCase(pk.getColumnName()), pk);
            }

            for (DataMediaPair mediaPair : mediaPairs) {// ???
                List<ColumnPair> columnPairs = mediaPair.getColumnPairs();

                if (CollectionUtils.isEmpty(columnPairs) || mediaPair.getColumnPairMode().isExclude()) {
                    // 1. ?
                    // 2. exclude???????
                    return new ArrayList<String>(); // ?view
                                                    // filter?
                } else {
                    for (ColumnPair columnPair : columnPairs) {
                        String columnName = columnPair.getSourceColumn().getName();
                        if (!pkMap.containsKey(StringUtils.lowerCase(columnName))) {
                            allColumns.add(columnPair.getSourceColumn().getName());// ?
                        }
                    }
                }
            }

            return new ArrayList<String>(allColumns);
        }

        private List<String> select(DbDialect dbDialect, String schemaName, String tableName,
                TableData keyTableData, TableData columnTableData) throws InterruptedException {
            String selectSql = dbDialect.getSqlTemplate().getSelectSql(schemaName, tableName,
                    keyTableData.columnNames, columnTableData.columnNames);
            Exception exception = null;
            for (int i = 0; i < retryTimes; i++) {
                if (Thread.currentThread().isInterrupted()) {
                    throw new InterruptedException(); // 
                }

                try {
                    List<List<String>> result = dbDialect.getJdbcTemplate().query(selectSql,
                            keyTableData.columnValues, keyTableData.columnTypes,
                            new RowDataMapper(columnTableData.columnTypes));
                    if (CollectionUtils.isEmpty(result)) {
                        logger.warn("the mediaName = {}.{} not has rowdate in db \n {}",
                                new Object[] { schemaName, tableName, dumpEventData(eventData, selectSql) });
                        return null;
                    } else {
                        return result.get(0);
                    }

                } catch (Exception e) {
                    exception = e;
                    logger.warn("retry [" + (i + 1) + "] failed", e);
                }
            }

            throw new RuntimeException("db extract failed , data:\n " + dumpEventData(eventData, selectSql),
                    exception);
        }

        /**
         * oracleerosa???jdbc?.
         */
        private int[] getOraclePkTypes(Table table, String[] pkNames) {
            Column[] columns = table.getColumns();
            List<Integer> pkTypes = new ArrayList<Integer>();
            for (String pkName : pkNames) {
                for (Column column : columns) {
                    if (column.getName().equalsIgnoreCase(pkName)) {
                        pkTypes.add(column.getTypeCode());
                    }
                }
            }
            int[] types = new int[pkTypes.size()];
            for (int i = 0; i < types.length; i++) {
                types[i] = pkTypes.get(i);
            }
            return types;
        }

        @SuppressWarnings("unused")
        private boolean checkNeedDbForRowMode(Table table, List<String> viewColumns, EventData eventData) {
            if (viewColumns.size() != 0) {// 
                if (viewColumns.size() != eventData.getColumns().size()) {
                    return true;
                }

                // ??view
                for (EventColumn column : eventData.getColumns()) {
                    if (!viewColumns.contains(column.getColumnName())) {
                        return true;
                    }
                }

                return false;
            } else {
                if (table.getColumnCount() == eventData.getColumns().size() + eventData.getKeys().size()) {
                    return false;
                } else {
                    return true;
                }
            }
        }

        /**
         * ??
         */
        private TableData buildTableData(Table table, List<EventColumn> keys) {
            Column[] tableColumns = table.getColumns();

            TableData data = new TableData();
            data.indexs = new int[keys.size()];
            data.columnNames = new String[keys.size()];
            data.columnTypes = new int[keys.size()];
            data.columnValues = new Object[keys.size()];

            int i = 0;
            int index = 0;
            for (EventColumn keyColumn : keys) {
                for (Column tableColumn : tableColumns) {
                    if (StringUtils.equalsIgnoreCase(keyColumn.getColumnName(), tableColumn.getName())) {
                        data.indexs[i] = index;
                        data.columnNames[i] = tableColumn.getName();
                        data.columnTypes[i] = tableColumn.getTypeCode();
                        data.columnValues[i] = SqlUtils.stringToSqlValue(keyColumn.getColumnValue(),
                                tableColumn.getTypeCode(), tableColumn.isRequired(), false);

                        i++;
                        break;
                    }
                    index++;
                }
            }

            if (i != keys.size()) {
                throw new ExtractException(
                        "keys is not found in table " + table.toString() + " keys : " + dumpEventColumn(keys));
            }
            return data;
        }

        /**
         * ???
         */
        private TableData buildTableData(Table table, List<EventColumn> columns, boolean needAll,
                List<String> viewColumnNames) {
            Column[] tableColumns = table.getColumns();
            List<Column> noPkcolumns = new ArrayList<Column>();
            for (Column tableColumn : tableColumns) {
                if (!tableColumn.isPrimaryKey()) {
                    noPkcolumns.add(tableColumn);
                }
            }

            TableData data = new TableData();
            int size = columns.size();
            if (needAll) {
                size = viewColumnNames.size() != 0 ? viewColumnNames.size() : noPkcolumns.size();// view?view??
            }

            data.indexs = new int[size];
            data.columnNames = new String[size];
            data.columnTypes = new int[size];
            data.columnValues = new Object[size];

            int i = 0;
            if (needAll) {
                int index = 0;
                if (viewColumnNames.size() != 0) { // 
                    for (Column tableColumn : tableColumns) {
                        if (viewColumnNames.contains(tableColumn.getName())) {// ?view
                            data.indexs[i] = index;// 
                            data.columnNames[i] = tableColumn.getName();
                            data.columnTypes[i] = tableColumn.getTypeCode();
                            i++;
                        }

                        index++;
                    }
                } else {
                    for (Column tableColumn : tableColumns) {
                        if (!tableColumn.isPrimaryKey()) {
                            data.indexs[i] = index;// 
                            data.columnNames[i] = tableColumn.getName();
                            data.columnTypes[i] = tableColumn.getTypeCode();
                            i++;
                        }
                        index++;
                    }
                }
            } else {
                for (EventColumn column : columns) {
                    int index = 0;
                    for (Column tableColumn : tableColumns) {
                        if (StringUtils.equalsIgnoreCase(column.getColumnName(), tableColumn.getName())) {
                            data.indexs[i] = index;// 
                            data.columnNames[i] = tableColumn.getName();
                            data.columnTypes[i] = tableColumn.getTypeCode();

                            i++;
                            break;
                        }
                        index++;
                    }
                }

                if (i != columns.size()) {
                    throw new ExtractException("columns is not found in table " + table.toString() + " columns : "
                            + dumpEventColumn(columns));
                }
            }

            return data;
        }

        private String dumpEventData(EventData eventData, String selectSql) {
            return MessageFormat.format(eventData_format, eventData.getPairId(), eventData.getTableId(),
                    dumpEventColumn(eventData.getKeys()), "\t" + selectSql);
        }

        private String dumpEventColumn(List<EventColumn> columns) {
            StringBuilder builder = new StringBuilder(event_default_capacity);
            int size = columns.size();
            for (int i = 0; i < size; i++) {
                EventColumn column = columns.get(i);
                builder.append("\t").append(column.toString());
                if (i < columns.size() - 1) {
                    builder.append(SEP);
                }
            }
            return builder.toString();
        }

    }

    /**
     * ??
     */
    class TableData {

        int[] indexs;
        String[] columnNames;
        int[] columnTypes;
        Object[] columnValues;
    }

    /**
     * ????
     */
    class RowDataMapper implements RowMapper {

        private int[] columnTypes;

        public RowDataMapper(int[] columnTypes) {
            this.columnTypes = columnTypes;
        }

        public Object mapRow(ResultSet rs, int rowNum) throws SQLException {
            List<String> result = new ArrayList<String>();
            for (int i = 0; i < columnTypes.length; i++) {
                try {
                    String value = SqlUtils.sqlValueToString(rs, i + 1, columnTypes[i]);
                    result.add(value);
                } catch (Exception e) {
                    throw new ExtractException("ERROR ## get columnName has an error", e);
                }
            }
            return result;
        }
    }

    // ============================ setter / getter =========================

    public void setPoolSize(int poolSize) {
        this.poolSize = poolSize;
    }

}