Java tutorial
/* * Copyright 2017 StreamSets Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.streamsets.pipeline.lib.jdbc.multithread; import com.google.common.base.Throwables; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.util.concurrent.RateLimiter; import com.streamsets.pipeline.api.BatchContext; import com.streamsets.pipeline.api.ErrorCode; import com.streamsets.pipeline.api.PushSource; import com.streamsets.pipeline.api.StageException; import com.streamsets.pipeline.api.ToErrorContext; import com.streamsets.pipeline.lib.jdbc.JdbcErrors; import com.streamsets.pipeline.lib.jdbc.JdbcUtil; import com.streamsets.pipeline.lib.jdbc.UtilsProvider; import com.streamsets.pipeline.lib.jdbc.multithread.cache.JdbcTableReadContextInvalidationListener; import com.streamsets.pipeline.lib.jdbc.multithread.cache.JdbcTableReadContextLoader; import com.streamsets.pipeline.lib.jdbc.multithread.util.OffsetQueryUtil; import com.streamsets.pipeline.lib.util.ThreadUtil; import com.streamsets.pipeline.stage.common.DefaultErrorRecordHandler; import com.streamsets.pipeline.stage.common.ErrorRecordHandler; import com.streamsets.pipeline.stage.origin.jdbc.CommonSourceConfigBean; import com.streamsets.pipeline.stage.origin.jdbc.table.TableJdbcConfigBean; import com.streamsets.pipeline.stage.origin.jdbc.table.TableJdbcELEvalContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.SQLException; import java.time.ZoneId; import java.util.Calendar; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.TimeZone; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; public abstract class JdbcBaseRunnable implements Runnable, JdbcRunnable { private static final Logger LOG = LoggerFactory.getLogger(JdbcBaseRunnable.class); protected static final String JDBC_NAMESPACE_HEADER = "jdbc."; private static final long ACQUIRE_TABLE_SLEEP_INTERVAL = 5L; static final String THREAD_NAME = "Thread Name"; public static final String CURRENT_TABLE = "Current Table"; static final String TABLES_OWNED_COUNT = "Tables Owned"; static final String STATUS = "Status"; private static final String LAST_RATE_LIMIT_WAIT_TIME = "Last Rate Limit Wait (sec)"; public static final String TABLE_METRICS = "Table Metrics for Thread - "; public static final String TABLE_JDBC_THREAD_PREFIX = "Table Jdbc Runner - "; public static final String PARTITION_ATTRIBUTE = JDBC_NAMESPACE_HEADER + "partition"; public static final String THREAD_NUMBER_ATTRIBUTE = JDBC_NAMESPACE_HEADER + "threadNumber"; protected final PushSource.Context context; protected final Map<String, String> offsets; private final LoadingCache<TableRuntimeContext, TableReadContext> tableReadContextCache; private MultithreadedTableProvider tableProvider; protected final int threadNumber; private final int batchSize; protected final TableJdbcConfigBean tableJdbcConfigBean; protected final CommonSourceConfigBean commonSourceConfigBean; private final TableJdbcELEvalContext tableJdbcELEvalContext; private final ConnectionManager connectionManager; protected final ErrorRecordHandler errorRecordHandler; private final Map<String, Object> gaugeMap; protected TableRuntimeContext tableRuntimeContext; private long lastQueryIntervalTime; protected TableReadContext tableReadContext; private int numSQLErrors = 0; private SQLException firstSqlException = null; private final RateLimiter queryRateLimiter; private boolean isReconnect; protected final JdbcUtil jdbcUtil; private enum Status { WAITING_FOR_RATE_LIMIT_PERMIT, ACQUIRED_RATE_LIMIT_PERMIT, QUERYING_TABLE, GENERATING_BATCH, BATCH_GENERATED,; } public JdbcBaseRunnable(PushSource.Context context, int threadNumber, int batchSize, Map<String, String> offsets, MultithreadedTableProvider tableProvider, ConnectionManager connectionManager, TableJdbcConfigBean tableJdbcConfigBean, CommonSourceConfigBean commonSourceConfigBean, CacheLoader<TableRuntimeContext, TableReadContext> tableCacheLoader, RateLimiter queryRateLimiter) { this(context, threadNumber, batchSize, offsets, tableProvider, connectionManager, tableJdbcConfigBean, commonSourceConfigBean, tableCacheLoader, queryRateLimiter, false); } public JdbcBaseRunnable(PushSource.Context context, int threadNumber, int batchSize, Map<String, String> offsets, MultithreadedTableProvider tableProvider, ConnectionManager connectionManager, TableJdbcConfigBean tableJdbcConfigBean, CommonSourceConfigBean commonSourceConfigBean, CacheLoader<TableRuntimeContext, TableReadContext> tableCacheLoader, RateLimiter queryRateLimiter, boolean isReconnect) { this.jdbcUtil = UtilsProvider.getJdbcUtil(); this.context = context; this.threadNumber = threadNumber; this.lastQueryIntervalTime = -1; this.batchSize = batchSize; this.tableJdbcELEvalContext = new TableJdbcELEvalContext(context, context.createELVars()); this.offsets = offsets; this.tableProvider = tableProvider; this.tableJdbcConfigBean = tableJdbcConfigBean; this.commonSourceConfigBean = commonSourceConfigBean; this.connectionManager = connectionManager; this.tableReadContextCache = buildReadContextCache(tableCacheLoader); this.errorRecordHandler = new DefaultErrorRecordHandler(context, (ToErrorContext) context); this.numSQLErrors = 0; this.firstSqlException = null; this.isReconnect = isReconnect; // Metrics this.gaugeMap = context.createGauge(TABLE_METRICS + threadNumber).getValue(); this.queryRateLimiter = queryRateLimiter; } public LoadingCache<TableRuntimeContext, TableReadContext> getTableReadContextCache() { return tableReadContextCache; } @Override public void run() { Thread.currentThread().setName(TABLE_JDBC_THREAD_PREFIX + threadNumber); initGaugeIfNeeded(); while (!context.isStopped()) { generateBatchAndCommitOffset(context.startBatch()); } } /** * Builds the Read Context Cache {@link #tableReadContextCache} */ @SuppressWarnings("unchecked") private LoadingCache<TableRuntimeContext, TableReadContext> buildReadContextCache( CacheLoader<TableRuntimeContext, TableReadContext> tableCacheLoader) { CacheBuilder resultSetCacheBuilder = CacheBuilder.newBuilder() .removalListener(new JdbcTableReadContextInvalidationListener()); if (tableJdbcConfigBean.batchTableStrategy == BatchTableStrategy.SWITCH_TABLES) { if (tableJdbcConfigBean.resultCacheSize > 0) { resultSetCacheBuilder = resultSetCacheBuilder.maximumSize(tableJdbcConfigBean.resultCacheSize); } } else { resultSetCacheBuilder = resultSetCacheBuilder.maximumSize(1); } if (tableCacheLoader != null) { return resultSetCacheBuilder.build(tableCacheLoader); } else { return resultSetCacheBuilder.build(new JdbcTableReadContextLoader(connectionManager, offsets, tableJdbcConfigBean.fetchSize, tableJdbcConfigBean.quoteChar.getQuoteCharacter(), tableJdbcELEvalContext, isReconnect)); } } /** * Initialize the gauge with needed information */ private void initGaugeIfNeeded() { gaugeMap.put(THREAD_NAME, Thread.currentThread().getName()); gaugeMap.put(STATUS, ""); gaugeMap.put(TABLES_OWNED_COUNT, tableReadContextCache.size()); gaugeMap.put(CURRENT_TABLE, ""); } private void updateGauge(JdbcBaseRunnable.Status status) { gaugeMap.put(STATUS, status.name()); gaugeMap.put(CURRENT_TABLE, Optional.ofNullable(tableRuntimeContext).map(TableRuntimeContext::getQualifiedName).orElse("")); gaugeMap.put(TABLES_OWNED_COUNT, tableProvider.getOwnedTablesQueue().size()); } /** * Generate a batch (looping through as many tables as needed) until a batch can be generated * and then commit offset. */ private void generateBatchAndCommitOffset(BatchContext batchContext) { int recordCount = 0; int eventCount = 0; try { while (tableRuntimeContext == null) { tableRuntimeContext = tableProvider.nextTable(threadNumber); if (tableRuntimeContext == null) { // small sleep before trying to acquire a table again, to potentially allow a new partition to be // returned to shared queue or created final boolean uninterrupted = ThreadUtil.sleep(ACQUIRE_TABLE_SLEEP_INTERVAL); if (!uninterrupted) { LOG.trace("Interrupted which trying to acquire table"); } if (!uninterrupted || tableRuntimeContext == null) { return; } } } updateGauge(JdbcBaseRunnable.Status.QUERYING_TABLE); tableReadContext = getOrLoadTableReadContext(); ResultSet rs = tableReadContext.getResultSet(); boolean resultSetEndReached = false; try { updateGauge(JdbcBaseRunnable.Status.GENERATING_BATCH); while (recordCount < batchSize) { if (rs.isClosed() || !rs.next()) { if (rs.isClosed()) { LOG.trace("ResultSet is closed"); } resultSetEndReached = true; break; } createAndAddRecord(rs, tableRuntimeContext, batchContext); recordCount++; } LOG.trace("{} records generated", recordCount); if (commonSourceConfigBean.enableSchemaChanges) { generateSchemaChanges(batchContext); } tableRuntimeContext.setResultSetProduced(true); //Reset numSqlErrors if we are able to read result set and add records to the batch context. numSQLErrors = 0; firstSqlException = null; //If exception happened we do not report anything about no more data event //We report noMoreData if either evictTableReadContext is true (result set no more rows) / record count is 0. final AtomicBoolean tableFinished = new AtomicBoolean(false); final AtomicBoolean schemaFinished = new AtomicBoolean(false); final List<String> schemaFinishedTables = new LinkedList<>(); tableProvider.reportDataOrNoMoreData(tableRuntimeContext, recordCount, batchSize, resultSetEndReached, tableFinished, schemaFinished, schemaFinishedTables); if (tableFinished.get()) { TableJdbcEvents.createTableFinishedEvent(context, tableRuntimeContext); eventCount++; } if (schemaFinished.get()) { TableJdbcEvents.createSchemaFinishedEvent(context, tableRuntimeContext, schemaFinishedTables); eventCount++; } } finally { handlePostBatchAsNeeded(resultSetEndReached, recordCount, eventCount, batchContext); } } catch (SQLException | ExecutionException | StageException | InterruptedException e) { LOG.error("Error happened", e); //invalidate if the connection is closed tableReadContextCache.invalidateAll(); connectionManager.closeConnection(); //If we have executed post batch that had errored out if (tableRuntimeContext != null) { final TableContext table = tableRuntimeContext.getSourceTableContext(); //if the currently acquired tableContext is no longer a valid candidate, try re-fetch the table from table provider if (commonSourceConfigBean.allowLateTable && !tableProvider.getActiveRuntimeContexts().containsKey(table)) { tableRuntimeContext = null; } } Throwable th = (e instanceof ExecutionException) ? e.getCause() : e; if (th instanceof SQLException) { handleSqlException((SQLException) th); } else if (e instanceof InterruptedException) { LOG.error("Thread {} interrupted", gaugeMap.get(THREAD_NAME)); } else { handleStageError(JdbcErrors.JDBC_67, e); } } } private void handleSqlException(SQLException sqlE) { numSQLErrors++; if (numSQLErrors == 1) { firstSqlException = sqlE; } if (numSQLErrors < commonSourceConfigBean.numSQLErrorRetries) { LOG.error("SQL Exception happened : {}, will retry. Retries Count : {}, Max Retries Count : {}", jdbcUtil.formatSqlException(sqlE), numSQLErrors, commonSourceConfigBean.numSQLErrorRetries); } else { LOG.error("Maximum SQL Error Retries {} exhausted", commonSourceConfigBean.numSQLErrorRetries); handleStageError(JdbcErrors.JDBC_78, firstSqlException); } } private void calculateEvictTableFlag(AtomicBoolean evictTableReadContext, TableReadContext tableReadContext) { if (tableReadContext.isNeverEvict()) { evictTableReadContext.set(false); return; } boolean shouldEvict = evictTableReadContext.get(); boolean isNumberOfBatchesReached = (tableJdbcConfigBean.batchTableStrategy == BatchTableStrategy.SWITCH_TABLES && tableJdbcConfigBean.numberOfBatchesFromRs > 0 && tableReadContext.getNumberOfBatches() >= tableJdbcConfigBean.numberOfBatchesFromRs); evictTableReadContext.set(shouldEvict || isNumberOfBatchesReached); } /** * After a batch is generate perform needed operations, generate and commit batch * Evict entries from {@link #tableReadContextCache}, reset {@link #tableRuntimeContext} */ protected void handlePostBatchAsNeeded(boolean resultSetEndReached, int recordCount, int eventCount, BatchContext batchContext) { AtomicBoolean shouldEvict = new AtomicBoolean(resultSetEndReached); //Only process batch if there are records or events if (recordCount > 0 || eventCount > 0) { TableReadContext tableReadContext = tableReadContextCache.getIfPresent(tableRuntimeContext); Optional.ofNullable(tableReadContext).ifPresent(readContext -> { readContext.setNumberOfBatches(readContext.getNumberOfBatches() + 1); LOG.debug("Table {} read {} number of batches from the fetched result set", tableRuntimeContext.getQualifiedName(), readContext.getNumberOfBatches()); calculateEvictTableFlag(shouldEvict, tableReadContext); }); updateGauge(JdbcBaseRunnable.Status.BATCH_GENERATED); //Process And Commit offsets if (tableRuntimeContext.isUsingNonIncrementalLoad()) { // process the batch now, will handle the offset commit outside this block context.processBatch(batchContext); } else { // for incremental (normal) mode, the offset was already stored in this map // by the specific subclass's createAndAddRecord method final String offsetValue = offsets.get(tableRuntimeContext.getOffsetKey()); context.processBatch(batchContext, tableRuntimeContext.getOffsetKey(), offsetValue); } } if (tableRuntimeContext.isUsingNonIncrementalLoad()) { // for non-incremental mode, the offset is simply a singleton map indicating whether it's finished final String offsetValue = createNonIncrementalLoadOffsetValue(resultSetEndReached); context.commitOffset(tableRuntimeContext.getOffsetKey(), offsetValue); } //Make sure we close the result set only when there are no more rows in the result set if (shouldEvict.get()) { //Invalidate so as to fetch a new result set //We close the result set/statement in Removal Listener tableReadContextCache.invalidate(tableRuntimeContext); tableProvider.releaseOwnedTable(tableRuntimeContext, threadNumber); tableRuntimeContext = null; } else if (tableJdbcConfigBean.batchTableStrategy == BatchTableStrategy.SWITCH_TABLES && !tableRuntimeContext.isUsingNonIncrementalLoad()) { tableRuntimeContext = null; } final List<TableRuntimeContext> removedPartitions = tableProvider.getAndClearRemovedPartitions(); if (removedPartitions != null && removedPartitions.size() > 0) { for (TableRuntimeContext partition : removedPartitions) { LOG.debug( "Removing offset entry for partition {} since it has been removed from the table provider", partition.getDescription()); context.commitOffset(partition.getOffsetKey(), null); } } } private static String createNonIncrementalLoadOffsetValue(boolean completed) { final Map<String, String> offsets = new HashMap<>(); offsets.put(TableRuntimeContext.NON_INCREMENTAL_LOAD_OFFSET_COMPLETED_KEY, String.valueOf(completed)); return OffsetQueryUtil.getSourceKeyOffsetsRepresentation(offsets); } /** * Initialize the {@link TableJdbcELEvalContext} before generating a batch */ private static void initTableEvalContextForProduce(TableJdbcELEvalContext tableJdbcELEvalContext, TableRuntimeContext tableContext, Calendar calendar) { tableJdbcELEvalContext.setCalendar(calendar); tableJdbcELEvalContext.setTime(calendar.getTime()); tableJdbcELEvalContext.setTableContext(tableContext); } /** * Wait If needed before a new query is issued. (Does not wait if the result set is already cached) */ private void waitIfNeeded() throws InterruptedException { if (queryRateLimiter != null) { updateGauge(Status.WAITING_FOR_RATE_LIMIT_PERMIT); double waitTime = queryRateLimiter.acquire(); updateGauge(Status.ACQUIRED_RATE_LIMIT_PERMIT); gaugeMap.put(LAST_RATE_LIMIT_WAIT_TIME, waitTime); } else { LOG.debug("No query rate limiter in effect; not waiting"); } } /** * Get Or Load {@link TableReadContext} for {@link #tableRuntimeContext} */ private TableReadContext getOrLoadTableReadContext() throws ExecutionException, InterruptedException { initTableEvalContextForProduce(tableJdbcELEvalContext, tableRuntimeContext, Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of(tableJdbcConfigBean.timeZoneID)))); //Check and then if we want to wait for query being issued do that TableReadContext tableReadContext = tableReadContextCache.getIfPresent(tableRuntimeContext); LOG.trace("Selected table : '{}' for generating records", tableRuntimeContext.getDescription()); if (tableReadContext == null) { //Wait before issuing query (Optimization instead of waiting during each batch) waitIfNeeded(); //Set time before query initTableEvalContextForProduce(tableJdbcELEvalContext, tableRuntimeContext, Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of(tableJdbcConfigBean.timeZoneID)))); tableReadContext = tableReadContextCache.get(tableRuntimeContext); //Record query time lastQueryIntervalTime = System.currentTimeMillis(); } return tableReadContext; } /** * Handle Exception */ private void handleStageError(ErrorCode errorCode, Exception e) { String errorMessage = (e instanceof SQLException) ? jdbcUtil.formatSqlException((SQLException) e) : "Failure Happened"; LOG.error(errorMessage, e); try { errorRecordHandler.onError(errorCode, e); } catch (StageException se) { LOG.error("Error when routing to stage error", se); //Way to throw stage exception from runnable to main source thread Throwables.propagate(se); } } }