Java tutorial
/* * Copyright (C) 2010-2101 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.otter.node.etl.extract.extractor; import java.sql.ResultSet; import java.sql.SQLException; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.SystemUtils; import org.apache.ddlutils.model.Column; import org.apache.ddlutils.model.Table; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; import org.springframework.beans.factory.DisposableBean; import org.springframework.beans.factory.InitializingBean; import org.springframework.jdbc.core.RowMapper; import org.springframework.util.Assert; import org.springframework.util.CollectionUtils; import com.alibaba.otter.node.etl.OtterConstants; import com.alibaba.otter.node.etl.common.db.dialect.DbDialect; import com.alibaba.otter.node.etl.common.db.dialect.oracle.OracleDialect; import com.alibaba.otter.node.etl.common.db.utils.SqlUtils; import com.alibaba.otter.node.etl.extract.exceptions.ExtractException; import com.alibaba.otter.shared.common.model.config.ConfigHelper; import com.alibaba.otter.shared.common.model.config.data.ColumnPair; import com.alibaba.otter.shared.common.model.config.data.DataMedia; import com.alibaba.otter.shared.common.model.config.data.DataMediaPair; import com.alibaba.otter.shared.common.model.config.data.db.DbMediaSource; import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline; import com.alibaba.otter.shared.common.utils.thread.NamedThreadFactory; import com.alibaba.otter.shared.etl.model.DbBatch; import com.alibaba.otter.shared.etl.model.EventColumn; import com.alibaba.otter.shared.etl.model.EventColumnIndexComparable; import com.alibaba.otter.shared.etl.model.EventData; /** * ??? , ? {@linkplain DatabaseExtractWorker} * * <pre> * * 1. ??? = ( ? / (poolsize + 1) ) * ?? * 2. ????(?) * 3. {@linkplain DatabaseExtractWorker}????Thread.currentThread().isInterrupted(),dbcpdriver?? * 4. ??????update=true??????update=true???? (modify by ljh at 2012-11-04) * </pre> * * @author jianghang 2012-4-18 ?04:53:15 * @version 4.0.2 */ public class DatabaseExtractor extends AbstractExtractor<DbBatch> implements InitializingBean, DisposableBean { private static final String WORKER_NAME = "DataBaseExtractor"; private static final String WORKER_NAME_FORMAT = "pipelineId = %s , pipelineName = %s , " + WORKER_NAME; private static final Logger logger = LoggerFactory.getLogger(DatabaseExtractor.class); private static final int DEFAULT_POOL_SIZE = 5; private static final int retryTimes = 3; private int poolSize = DEFAULT_POOL_SIZE; private ExecutorService executor; @Override public void extract(DbBatch dbBatch) throws ExtractException { Assert.notNull(dbBatch); Assert.notNull(dbBatch.getRowBatch()); // ?? Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId()); boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia(); boolean isRow = pipeline.getParameters().getSyncMode().isRow();// ??? // ?? adjustPoolSize(pipeline.getParameters().getExtractPoolSize()); // Extractor? ExecutorCompletionService completionService = new ExecutorCompletionService(executor); // ??? ExtractException exception = null; // ?? List<DataItem> items = new ArrayList<DataItem>(); List<Future> futures = new ArrayList<Future>(); List<EventData> eventDatas = dbBatch.getRowBatch().getDatas(); for (EventData eventData : eventDatas) { if (eventData.getEventType().isDdl()) { continue; } DataItem item = new DataItem(eventData); // row??????row??? boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia()); // ?case, oracle erosa?????? if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) { DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); if (dataMedia.getSource().getType().isOracle()) { flag |= true; eventData.setRemedy(true);// ???erosa????? } } if (isRow && !flag) { // ????? // view?? flag = checkNeedDbForRowMode(pipeline, eventData); } if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {// ???? Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null); // ?? if (future.isDone()) { // ?CallerRun???? try { future.get(); } catch (InterruptedException e) { cancel(futures);// ?? throw new ExtractException(e); } catch (ExecutionException e) { cancel(futures); // ?? throw new ExtractException(e); } } futures.add(future);// } items.add(item);// ? } // ? int index = 0; while (index < futures.size()) { // ?? try { Future future = completionService.take();// ? future.get(); } catch (InterruptedException e) { exception = new ExtractException(e); break;// future } catch (ExecutionException e) { exception = new ExtractException(e); break;// future } index++; } if (index < futures.size()) { // ???cancel????? cancel(futures); throw exception; } else { // ?, ???? for (int i = 0; i < items.size(); i++) { DataItem item = items.get(i); if (item.filter) { // ??????? eventDatas.remove(item.getEventData()); } } } } private boolean checkNeedDbForRowMode(Pipeline pipeline, EventData eventData) { // ??? DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource()); Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName()); if (table.getColumnCount() == eventData.getColumns().size() + eventData.getKeys().size()) { return false; } else { return true; } } // ?? private void cancel(List<Future> futures) { for (int i = 0; i < futures.size(); i++) { Future future = futures.get(i); if (future.isDone() == false) { future.cancel(true);// ?? } } } // private void adjustPoolSize(int newPoolSize) { if (newPoolSize != poolSize) { poolSize = newPoolSize; if (executor instanceof ThreadPoolExecutor) { ThreadPoolExecutor pool = (ThreadPoolExecutor) executor; pool.setCorePoolSize(newPoolSize); pool.setMaximumPoolSize(newPoolSize); } } } public void afterPropertiesSet() throws Exception { executor = new ThreadPoolExecutor(poolSize, poolSize, 0L, TimeUnit.MILLISECONDS, new ArrayBlockingQueue(poolSize * 4), new NamedThreadFactory(WORKER_NAME), new ThreadPoolExecutor.CallerRunsPolicy()); } public void destroy() throws Exception { executor.shutdownNow(); } // ? class DataItem { private EventData eventData; private boolean filter = false; public DataItem(EventData eventData) { this.eventData = eventData; } public EventData getEventData() { return eventData; } public void setEventData(EventData eventData) { this.eventData = eventData; } public boolean isFilter() { return filter; } public void setFilter(boolean filter) { this.filter = filter; } } /** * ??????? * * @author jianghang 2012-4-19 ?05:14:18 * @version 4.0.2 */ class DatabaseExtractWorker implements Runnable { private final int event_default_capacity = 1024; // StringBuilder?? private String eventData_format = null; private final String SEP = SystemUtils.LINE_SEPARATOR; private Pipeline pipeline; private DataItem item; private EventData eventData; { eventData_format = "-----------------" + SEP; eventData_format += "- PairId: {0} , TableId: {1} " + SEP; eventData_format += "-----------------" + SEP; eventData_format += "---START" + SEP; eventData_format += "---Pks" + SEP; eventData_format += "{2}" + SEP; eventData_format += "---Sql" + SEP; eventData_format += "{3}" + SEP; eventData_format += "---END" + SEP; } public DatabaseExtractWorker(Pipeline pipeline, DataItem item) { this.pipeline = pipeline; this.item = item; this.eventData = item.getEventData(); } public void run() { try { MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeline.getId())); Thread.currentThread() .setName(String.format(WORKER_NAME_FORMAT, pipeline.getId(), pipeline.getName())); // ??? DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId()); DbDialect dbDialect = dbDialectFactory.getDbDialect(pipeline.getId(), (DbMediaSource) dataMedia.getSource()); Table table = dbDialect.findTable(eventData.getSchemaName(), eventData.getTableName()); TableData keyTableData = buildTableData(table, eventData.getKeys()); // oracle? if (dbDialect instanceof OracleDialect) { keyTableData.columnTypes = getOraclePkTypes(table, keyTableData.columnNames); } boolean needAll = pipeline.getParameters().getSyncMode().isRow() || (eventData.getSyncMode() != null && eventData.getSyncMode().isRow()); // ?case, oracle erosa??????? needAll |= CollectionUtils.isEmpty(eventData.getUpdatedColumns()) && dataMedia.getSource().getType().isOracle(); List<DataMediaPair> mediaParis = ConfigHelper.findDataMediaPairByMediaId(pipeline, dataMedia.getId()); List<String> viewColumnNames = buildMaxColumnsFromColumnPairs(mediaParis, eventData.getKeys()); // TODO ? // if (needAll) { // boolean needDb = checkNeedDbForRowMode(table, // viewColumnNames, eventData); // if (needAll && !needDb) {// ???? // item.setFilter(false); // return; // } // } // modified by ljh at 2012-11-04 // ??????update=true?update=false??filter?????? TableData columnTableData = buildTableData(table, eventData.getUpdatedColumns(), needAll, viewColumnNames); if (columnTableData.columnNames.length == 0) { // ???? } else { List<String> newColumnValues = select(dbDialect, eventData.getSchemaName(), eventData.getTableName(), keyTableData, columnTableData); if (newColumnValues == null) { // miss from db // filter=true??. // ???otter??? // item.setFilter(true); // ????? // a. ???? // b. ???? boolean needFilter = eventData.isRemedy() || pipeline.getParameters().getSkipNoRow(); item.setFilter(needFilter); // ???item int index = 0; for (EventColumn oldKey : eventData.getOldKeys()) { if (!oldKey.equals(eventData.getKeys().get(index))) { item.setFilter(false); break; } } } else { // ?? List<EventColumn> newEventColumns = new ArrayList<EventColumn>(); for (int i = 0; i < newColumnValues.size(); i++) { EventColumn column = new EventColumn(); column.setIndex(columnTableData.indexs[i]); column.setColumnName(columnTableData.columnNames[i]); column.setColumnType(columnTableData.columnTypes[i]); column.setNull(newColumnValues.get(i) == null); column.setColumnValue(newColumnValues.get(i)); column.setUpdate(true); newEventColumns.add(column); } // ?columns??? for (EventColumn column : eventData.getColumns()) { boolean override = false; for (EventColumn newEventColumn : newEventColumns) { if (StringUtils.equalsIgnoreCase(newEventColumn.getColumnName(), column.getColumnName())) { override = true; break; } } if (!override) {// newcolumns? newEventColumns.add(column); } } Collections.sort(newEventColumns, new EventColumnIndexComparable()); // ?? eventData.setColumns(newEventColumns); } } } catch (InterruptedException e) { // ignore } finally { Thread.currentThread().setName(WORKER_NAME); MDC.remove(OtterConstants.splitPipelineLogFileKey); } } /** * ??columnPair????? */ private List<String> buildMaxColumnsFromColumnPairs(List<DataMediaPair> mediaPairs, List<EventColumn> pks) { Set<String> allColumns = new HashSet<String>(); Map<String, EventColumn> pkMap = new HashMap<String, EventColumn>(pks.size(), 1f); for (EventColumn pk : pks) { pkMap.put(StringUtils.lowerCase(pk.getColumnName()), pk); } for (DataMediaPair mediaPair : mediaPairs) {// ??? List<ColumnPair> columnPairs = mediaPair.getColumnPairs(); if (CollectionUtils.isEmpty(columnPairs) || mediaPair.getColumnPairMode().isExclude()) { // 1. ? // 2. exclude??????? return new ArrayList<String>(); // ?view // filter? } else { for (ColumnPair columnPair : columnPairs) { String columnName = columnPair.getSourceColumn().getName(); if (!pkMap.containsKey(StringUtils.lowerCase(columnName))) { allColumns.add(columnPair.getSourceColumn().getName());// ? } } } } return new ArrayList<String>(allColumns); } private List<String> select(DbDialect dbDialect, String schemaName, String tableName, TableData keyTableData, TableData columnTableData) throws InterruptedException { String selectSql = dbDialect.getSqlTemplate().getSelectSql(schemaName, tableName, keyTableData.columnNames, columnTableData.columnNames); Exception exception = null; for (int i = 0; i < retryTimes; i++) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); // } try { List<List<String>> result = dbDialect.getJdbcTemplate().query(selectSql, keyTableData.columnValues, keyTableData.columnTypes, new RowDataMapper(columnTableData.columnTypes)); if (CollectionUtils.isEmpty(result)) { logger.warn("the mediaName = {}.{} not has rowdate in db \n {}", new Object[] { schemaName, tableName, dumpEventData(eventData, selectSql) }); return null; } else { return result.get(0); } } catch (Exception e) { exception = e; logger.warn("retry [" + (i + 1) + "] failed", e); } } throw new RuntimeException("db extract failed , data:\n " + dumpEventData(eventData, selectSql), exception); } /** * oracleerosa???jdbc?. */ private int[] getOraclePkTypes(Table table, String[] pkNames) { Column[] columns = table.getColumns(); List<Integer> pkTypes = new ArrayList<Integer>(); for (String pkName : pkNames) { for (Column column : columns) { if (column.getName().equalsIgnoreCase(pkName)) { pkTypes.add(column.getTypeCode()); } } } int[] types = new int[pkTypes.size()]; for (int i = 0; i < types.length; i++) { types[i] = pkTypes.get(i); } return types; } @SuppressWarnings("unused") private boolean checkNeedDbForRowMode(Table table, List<String> viewColumns, EventData eventData) { if (viewColumns.size() != 0) {// if (viewColumns.size() != eventData.getColumns().size()) { return true; } // ??view for (EventColumn column : eventData.getColumns()) { if (!viewColumns.contains(column.getColumnName())) { return true; } } return false; } else { if (table.getColumnCount() == eventData.getColumns().size() + eventData.getKeys().size()) { return false; } else { return true; } } } /** * ?? */ private TableData buildTableData(Table table, List<EventColumn> keys) { Column[] tableColumns = table.getColumns(); TableData data = new TableData(); data.indexs = new int[keys.size()]; data.columnNames = new String[keys.size()]; data.columnTypes = new int[keys.size()]; data.columnValues = new Object[keys.size()]; int i = 0; int index = 0; for (EventColumn keyColumn : keys) { for (Column tableColumn : tableColumns) { if (StringUtils.equalsIgnoreCase(keyColumn.getColumnName(), tableColumn.getName())) { data.indexs[i] = index; data.columnNames[i] = tableColumn.getName(); data.columnTypes[i] = tableColumn.getTypeCode(); data.columnValues[i] = SqlUtils.stringToSqlValue(keyColumn.getColumnValue(), tableColumn.getTypeCode(), tableColumn.isRequired(), false); i++; break; } index++; } } if (i != keys.size()) { throw new ExtractException( "keys is not found in table " + table.toString() + " keys : " + dumpEventColumn(keys)); } return data; } /** * ??? */ private TableData buildTableData(Table table, List<EventColumn> columns, boolean needAll, List<String> viewColumnNames) { Column[] tableColumns = table.getColumns(); List<Column> noPkcolumns = new ArrayList<Column>(); for (Column tableColumn : tableColumns) { if (!tableColumn.isPrimaryKey()) { noPkcolumns.add(tableColumn); } } TableData data = new TableData(); int size = columns.size(); if (needAll) { size = viewColumnNames.size() != 0 ? viewColumnNames.size() : noPkcolumns.size();// view?view?? } data.indexs = new int[size]; data.columnNames = new String[size]; data.columnTypes = new int[size]; data.columnValues = new Object[size]; int i = 0; if (needAll) { int index = 0; if (viewColumnNames.size() != 0) { // for (Column tableColumn : tableColumns) { if (viewColumnNames.contains(tableColumn.getName())) {// ?view data.indexs[i] = index;// data.columnNames[i] = tableColumn.getName(); data.columnTypes[i] = tableColumn.getTypeCode(); i++; } index++; } } else { for (Column tableColumn : tableColumns) { if (!tableColumn.isPrimaryKey()) { data.indexs[i] = index;// data.columnNames[i] = tableColumn.getName(); data.columnTypes[i] = tableColumn.getTypeCode(); i++; } index++; } } } else { for (EventColumn column : columns) { int index = 0; for (Column tableColumn : tableColumns) { if (StringUtils.equalsIgnoreCase(column.getColumnName(), tableColumn.getName())) { data.indexs[i] = index;// data.columnNames[i] = tableColumn.getName(); data.columnTypes[i] = tableColumn.getTypeCode(); i++; break; } index++; } } if (i != columns.size()) { throw new ExtractException("columns is not found in table " + table.toString() + " columns : " + dumpEventColumn(columns)); } } return data; } private String dumpEventData(EventData eventData, String selectSql) { return MessageFormat.format(eventData_format, eventData.getPairId(), eventData.getTableId(), dumpEventColumn(eventData.getKeys()), "\t" + selectSql); } private String dumpEventColumn(List<EventColumn> columns) { StringBuilder builder = new StringBuilder(event_default_capacity); int size = columns.size(); for (int i = 0; i < size; i++) { EventColumn column = columns.get(i); builder.append("\t").append(column.toString()); if (i < columns.size() - 1) { builder.append(SEP); } } return builder.toString(); } } /** * ?? */ class TableData { int[] indexs; String[] columnNames; int[] columnTypes; Object[] columnValues; } /** * ???? */ class RowDataMapper implements RowMapper { private int[] columnTypes; public RowDataMapper(int[] columnTypes) { this.columnTypes = columnTypes; } public Object mapRow(ResultSet rs, int rowNum) throws SQLException { List<String> result = new ArrayList<String>(); for (int i = 0; i < columnTypes.length; i++) { try { String value = SqlUtils.sqlValueToString(rs, i + 1, columnTypes[i]); result.add(value); } catch (Exception e) { throw new ExtractException("ERROR ## get columnName has an error", e); } } return result; } } // ============================ setter / getter ========================= public void setPoolSize(int poolSize) { this.poolSize = poolSize; } }