Java tutorial
/* * Copyright (C) 2016 Stratio (http://stratio.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.connector.jdbc.oracle; import java.io.Serializable; import java.math.BigDecimal; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import org.apache.sqoop.connector.jdbc.oracle.configuration.LinkConfiguration; import org.apache.sqoop.connector.jdbc.oracle.configuration.ToJobConfig; import org.apache.sqoop.connector.jdbc.oracle.configuration.ToJobConfiguration; import org.apache.sqoop.connector.jdbc.oracle.util.OracleConnectionFactory; import org.apache.sqoop.connector.jdbc.oracle.util.OracleQueries; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTable; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTableColumn; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTableColumns; import org.apache.sqoop.connector.jdbc.oracle.util.OracleUtilities; import org.apache.sqoop.connector.jdbc.oracle.util.OracleUtilities.InsertMode; import org.apache.sqoop.connector.jdbc.oracle.util.OracleUtilities.UpdateMode; import org.apache.sqoop.connector.jdbc.oracle.util.OracleVersion; import org.apache.sqoop.job.etl.Loader; import org.apache.sqoop.job.etl.LoaderContext; import org.apache.sqoop.schema.type.Column; import org.joda.time.LocalDateTime; public class OracleJdbcLoader extends Loader<LinkConfiguration, ToJobConfiguration> implements Serializable { private static final Logger LOG = Logger.getLogger(OracleJdbcToDestroyer.class); private long rowsWritten = 0; private LoaderContext context; private Connection connection; private OracleVersion oracleVersion; private OracleTable table; // <- If exporting into a partitioned // table, this table will be unique for // this mapper private OracleTableColumns tableColumns; // <- The columns in the // table we're inserting rows // into private int mapperId; // <- The index of this Hadoop mapper private boolean tableHasMapperRowNumberColumn; // <- Whether the export // table contain the column // SQOOP_MAPPER_ROW private long mapperRowNumber; // <- The 1-based row number being processed // by this mapper. It's inserted into the // "SQOOP_MAPPER_ROW" column private boolean useAppendValuesOracleHint = false; // <- Whether to use the // " /*+APPEND_VALUES*/ " hint // within the Oracle SQL // statement we generate private long numberOfRowsSkipped; // <- The number of rows encountered // during configurePreparedStatement() // that had a NULL value for (one of) the // update columns. This row was therefore // skipped. private String[] updateColumnNames; private int rowsPerBatch; private int rowsPerCommit; private void setupInsert(LinkConfiguration linkConfiguration, ToJobConfiguration jobConfiguration) throws SQLException { // Is each mapper inserting rows into a unique table?... InsertMode insertMode = OracleUtilities.getExportInsertMode(jobConfiguration.toJobConfig, context.getContext()); if (insertMode == InsertMode.ExchangePartition) { Object sysDateTime = OracleUtilities.recallOracleDateTime(context.getContext()); table = OracleUtilities.generateExportTableMapperTableName(mapperId, sysDateTime, null); } else { table = OracleUtilities.decodeOracleTableName(linkConfiguration.connectionConfig.username, jobConfiguration.toJobConfig.tableName); } // Should we use the APPEND_VALUES Oracle hint?... useAppendValuesOracleHint = false; if (insertMode == InsertMode.ExchangePartition) { // NB: "Direct inserts" cannot utilize APPEND_VALUES, otherwise Oracle // will serialize // the N mappers, causing a lot of lock contention. useAppendValuesOracleHint = canUseOracleAppendValuesHint(); } } private void setupUpdate(LinkConfiguration linkConfiguration, ToJobConfiguration jobConfiguration) throws SQLException { UpdateMode updateMode = OracleUtilities.getExportUpdateMode(jobConfiguration.toJobConfig); Object sysDateTime = OracleUtilities.recallOracleDateTime(context.getContext()); table = OracleUtilities.generateExportTableMapperTableName(mapperId, sysDateTime, null); updateColumnNames = OracleUtilities.getExportUpdateKeyColumnNames(jobConfiguration.toJobConfig); if (updateMode == UpdateMode.Merge || updateMode == UpdateMode.Update) { // Should we use the APPEND_VALUES Oracle hint?... useAppendValuesOracleHint = canUseOracleAppendValuesHint(); } } @Override public void load(LoaderContext context, LinkConfiguration linkConfiguration, ToJobConfiguration jobConfiguration) throws Exception { LOG.debug("Running Oracle JDBC connector loader"); this.context = context; //TODO: Mapper ID mapperId = 1; //TODO: Hardcoded values rowsPerBatch = 5000; rowsPerCommit = 5000; // Retrieve the JDBC URL that should be used by this mapper. // We achieve this by modifying the JDBC URL property in the // configuration, prior to the OraOopDBRecordWriter's (ancestral) // constructor using the configuration to establish a connection // to the database - via DBConfiguration.getConnection()... String mapperJdbcUrlPropertyName = OracleUtilities.getMapperJdbcUrlPropertyName(mapperId); // Get this mapper's JDBC URL String mapperJdbcUrl = context.getString(mapperJdbcUrlPropertyName, null); LOG.debug(String.format("Mapper %d has a JDBC URL of: %s", mapperId, mapperJdbcUrl == null ? "<null>" : mapperJdbcUrl)); connection = OracleConnectionFactory.createOracleJdbcConnection( OracleJdbcConnectorConstants.ORACLE_JDBC_DRIVER_CLASS, mapperJdbcUrl, linkConfiguration.connectionConfig.username, linkConfiguration.connectionConfig.password); String thisOracleInstanceName = OracleQueries.getCurrentOracleInstanceName(connection); LOG.info(String.format("This record writer is connected to Oracle via the JDBC URL: \n" + "\t\"%s\"\n" + "\tto the Oracle instance: \"%s\"", connection.toString(), thisOracleInstanceName)); OracleConnectionFactory.initializeOracleConnection(connection, linkConfiguration.connectionConfig); connection.setAutoCommit(false); oracleVersion = OracleQueries.getOracleVersion(connection); if (jobConfiguration.toJobConfig.updateKey == null || jobConfiguration.toJobConfig.updateKey.isEmpty()) { setupInsert(linkConfiguration, jobConfiguration); } else { setupUpdate(linkConfiguration, jobConfiguration); } tableColumns = OracleQueries.getToTableColumns(connection, table, true, false); tableHasMapperRowNumberColumn = tableColumns .findColumnByName(OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_MAPPER_ROW) != null; // Has the user forced the use of APPEND_VALUES either on or off?... useAppendValuesOracleHint = allowUserToOverrideUseOfTheOracleAppendValuesHint(jobConfiguration.toJobConfig, useAppendValuesOracleHint); insertData(); connection.close(); } @Override public long getRowsWritten() { return rowsWritten; } private void insertData() throws Exception { // If using APPEND_VALUES, check the batch size and commit frequency... if (useAppendValuesOracleHint) { if (rowsPerBatch < OracleJdbcConnectorConstants.MIN_APPEND_VALUES_BATCH_SIZE_DEFAULT) { LOG.info(String.format( "The number of rows per batch-insert has been changed from %d " + "to %d. This is in response " + "to the Oracle APPEND_VALUES hint being used.", rowsPerBatch, OracleJdbcConnectorConstants.MIN_APPEND_VALUES_BATCH_SIZE_DEFAULT)); rowsPerBatch = OracleJdbcConnectorConstants.MIN_APPEND_VALUES_BATCH_SIZE_DEFAULT; } // Need to commit after each batch when using APPEND_VALUES if (rowsPerCommit != rowsPerBatch) { LOG.info(String.format("The number of rows to insert per commit has been " + "changed from %d to %d. This is in response " + "to the Oracle APPEND_VALUES hint being used.", rowsPerCommit, rowsPerBatch)); rowsPerCommit = rowsPerBatch; } } mapperRowNumber = 1; String sql = getBatchInsertSqlStatement(useAppendValuesOracleHint ? "/*+APPEND_VALUES*/" : ""); PreparedStatement statement = connection.prepareStatement(sql); Column[] columns = context.getSchema().getColumnsArray(); Object[] array; boolean checkUpdateColumns = false; List<Integer> updateColumnIndexes = null; if (updateColumnNames != null) { checkUpdateColumns = true; updateColumnIndexes = new ArrayList<Integer>(); for (int idx = 0; idx < this.updateColumnNames.length; idx++) { for (int i = 0; i < columns.length; i++) { if (columns[i].getName().equals(updateColumnNames[idx])) { updateColumnIndexes.add(i); } } } } while ((array = context.getDataReader().readArrayRecord()) != null) { if (checkUpdateColumns) { boolean updateKeyValueIsNull = false; for (Integer i : updateColumnIndexes) { Object updateKeyValue = array[i]; if (updateKeyValue == null) { this.numberOfRowsSkipped++; updateKeyValueIsNull = true; break; } } if (updateKeyValueIsNull) { continue; } } rowsWritten++; configurePreparedStatementColumns(statement, columns, array); if (rowsWritten % rowsPerBatch == 0) { statement.executeBatch(); } if (rowsWritten % rowsPerCommit == 0) { connection.commit(); } } if (rowsWritten % rowsPerBatch != 0) { statement.executeBatch(); } connection.commit(); statement.close(); if (numberOfRowsSkipped > 0) { LOG.warn(String.format("%d records were skipped due to a NULL value within one of the " + "update-key column(s).\nHaving a NULL value prevents a record " + "from being able to be matched to a row in the Oracle table.", numberOfRowsSkipped)); } } private String getBatchInsertSqlStatement(String oracleHint) { // String[] columnNames = this.getColumnNames(); StringBuilder sqlNames = new StringBuilder(); StringBuilder sqlValues = new StringBuilder(); /* * NOTE: "this.oracleTableColumns" may contain a different list of columns * than "this.getColumnNames()". This is because: (1) * "this.getColumnNames()" includes columns with data-types that are not * supported by OraOop. (2) "this.oracleTableColumns" includes any * pseudo-columns that we've added to the export table (and don't exist in * the HDFS file being read). For example, if exporting to a partitioned * table (that OraOop created), there are two pseudo-columns we added to * the table to identify the export job and the mapper. */ List<String> columnNamesList = new ArrayList<String>(); for (Column column : context.getSchema().getColumnsList()) { columnNamesList.add(column.getName()); } int colCount = 0; for (int idx = 0; idx < this.tableColumns.size(); idx++) { OracleTableColumn oracleTableColumn = this.tableColumns.get(idx); String columnName = oracleTableColumn.getName(); if (columnNamesList.contains(columnName) || OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_PARTITION.equals(columnName) || OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_SUBPARTITION.equals(columnName) || OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_MAPPER_ROW.equals(columnName)) { // column names... if (colCount > 0) { sqlNames.append("\n,"); } sqlNames.append(columnName); // column values... if (colCount > 0) { sqlValues.append("\n,"); } String pseudoColumnValue = generateInsertValueForPseudoColumn(columnName); String bindVarName = null; if (pseudoColumnValue != null) { bindVarName = pseudoColumnValue; } else if (oracleTableColumn.getOracleType() == OracleQueries.getOracleType("STRUCT")) { if (oracleTableColumn.getDataType().equals(OracleJdbcConnectorConstants.Oracle.URITYPE)) { bindVarName = String.format("urifactory.getUri(%s)", columnNameToBindVariable(columnName)); } //TODO: Date as string? /*} else if (getConf().getBoolean( OraOopConstants.ORAOOP_MAP_TIMESTAMP_AS_STRING, OraOopConstants.ORAOOP_MAP_TIMESTAMP_AS_STRING_DEFAULT)) { if (oracleTableColumn.getOracleType() == OraOopOracleQueries .getOracleType("DATE")) { bindVarName = String.format("to_date(%s, 'yyyy-mm-dd hh24:mi:ss')", columnNameToBindVariable(columnName)); } else if (oracleTableColumn.getOracleType() == OraOopOracleQueries .getOracleType("TIMESTAMP")) { bindVarName = String.format("to_timestamp(%s, 'yyyy-mm-dd hh24:mi:ss.ff')", columnNameToBindVariable(columnName)); } else if (oracleTableColumn.getOracleType() == OraOopOracleQueries .getOracleType("TIMESTAMPTZ")) { bindVarName = String.format( "to_timestamp_tz(%s, 'yyyy-mm-dd hh24:mi:ss.ff TZR')", columnNameToBindVariable(columnName)); } else if (oracleTableColumn.getOracleType() == OraOopOracleQueries .getOracleType("TIMESTAMPLTZ")) { bindVarName = String.format( "to_timestamp_tz(%s, 'yyyy-mm-dd hh24:mi:ss.ff TZR')", columnNameToBindVariable(columnName)); }*/ } if (bindVarName == null) { bindVarName = columnNameToBindVariable(columnName); } sqlValues.append(bindVarName); colCount++; } } String sql = String.format("insert %s into %s\n" + "(%s)\n" + "values\n" + "(%s)\n", oracleHint, this.table.toString(), sqlNames.toString(), sqlValues.toString()); LOG.info("Batch-Mode insert statement:\n" + sql); return sql; } private String generateInsertValueForPseudoColumn(String columnName) { if (columnName.equalsIgnoreCase(OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_PARTITION)) { String partitionValueStr = context .getString(OracleJdbcConnectorConstants.ORAOOP_EXPORT_PARTITION_DATE_VALUE); if (partitionValueStr == null) { throw new RuntimeException("Unable to recall the value of the partition date-time."); } return String.format("to_date('%s', '%s')", partitionValueStr, OracleJdbcConnectorConstants.ORAOOP_EXPORT_PARTITION_DATE_FORMAT); } if (columnName.equalsIgnoreCase(OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_SUBPARTITION)) { return Integer.toString(this.mapperId); } return null; } private String columnNameToBindVariable(String columnName) { return ":" + columnName; } private void configurePreparedStatementColumns(PreparedStatement statement, Column[] columns, Object[] array) throws SQLException { String bindValueName; if (this.tableHasMapperRowNumberColumn) { bindValueName = columnNameToBindVariable(OracleJdbcConnectorConstants.COLUMN_NAME_EXPORT_MAPPER_ROW) .replaceFirst(":", ""); try { OracleQueries.setLongAtName(statement, bindValueName, this.mapperRowNumber); } catch (Exception e) { throw new RuntimeException(e); } this.mapperRowNumber++; } for (int i = 0; i < array.length; i++) { String colName = columns[i].getName(); bindValueName = columnNameToBindVariable(colName).replaceFirst(":", ""); OracleTableColumn oracleTableColumn = tableColumns.findColumnByName(colName); setBindValueAtName(statement, bindValueName, array[i], oracleTableColumn); } statement.addBatch(); } private void setBindValueAtName(PreparedStatement statement, String bindValueName, Object bindValue, OracleTableColumn column) throws SQLException { if (column.getOracleType() == OracleQueries.getOracleType("NUMBER")) { OracleQueries.setBigDecimalAtName(statement, bindValueName, (BigDecimal) bindValue); } else if (column.getOracleType() == OracleQueries.getOracleType("VARCHAR")) { OracleQueries.setStringAtName(statement, bindValueName, (String) bindValue); } else if (column.getOracleType() == OracleQueries.getOracleType("TIMESTAMP") || column.getOracleType() == OracleQueries.getOracleType("TIMESTAMPTZ") || column.getOracleType() == OracleQueries.getOracleType("TIMESTAMPLTZ")) { Object objValue = bindValue; if (objValue instanceof LocalDateTime) { //TODO: Improve date handling LocalDateTime value = (LocalDateTime) objValue; Timestamp timestampValue = new Timestamp(value.toDateTime().getMillis()); OracleQueries.setTimestampAtName(statement, bindValueName, timestampValue); } else { String value = (String) objValue; if (value == null || value.equalsIgnoreCase("null")) { value = ""; } OracleQueries.setStringAtName(statement, bindValueName, value); } } else if (column.getOracleType() == OracleQueries.getOracleType("BINARY_DOUBLE")) { Double value = (Double) bindValue; if (value != null) { OracleQueries.setBinaryDoubleAtName(statement, bindValueName, value); } else { OracleQueries.setObjectAtName(statement, bindValueName, null); } } else if (column.getOracleType() == OracleQueries.getOracleType("BINARY_FLOAT")) { Float value = (Float) bindValue; if (value != null) { OracleQueries.setBinaryFloatAtName(statement, bindValueName, value); } else { OracleQueries.setObjectAtName(statement, bindValueName, null); } } else if (column.getOracleType() == OracleQueries.getOracleType("STRUCT")) { // <- E.g. URITYPE if (column.getDataType().equals(OracleJdbcConnectorConstants.Oracle.URITYPE)) { String value = (String) bindValue; OracleQueries.setStringAtName(statement, bindValueName, value); } else { String msg = String.format( "%s needs to be updated to cope with the data-type: %s " + "where the Oracle data_type is \"%s\".", OracleUtilities.getCurrentMethodName(), column.getDataType(), column.getOracleType()); LOG.error(msg); throw new UnsupportedOperationException(msg); } } else { // LOB data-types are currently not supported during // a Sqoop Export. // JIRA: SQOOP-117 // OraOopConstants.SUPPORTED_EXPORT_ORACLE_DATA_TYPES_CLAUSE // will already have excluded all LOB columns. // case oracle.jdbc.OracleTypes.CLOB: // { // oracle.sql.CLOB clob = new // oracle.sql.CLOB(connection); // Object value = fieldMap.get(colName); // //clob.set // statement.setCLOBAtName(bindValueName, clob); // break; // } String msg = String.format("%s may need to be updated to cope with the data-type: %s", OracleUtilities.getCurrentMethodName(), column.getOracleType()); LOG.debug(msg); OracleQueries.setObjectAtName(statement, bindValueName, bindValue); } } private boolean canUseOracleAppendValuesHint() { // Should we use the APPEND_VALUES Oracle hint?... // (Yes, if this is Oracle 11.2 or above)... boolean result = oracleVersion.isGreaterThanOrEqualTo(11, 2, 0, 0); // If there is a BINARY_DOUBLE or BINARY_FLOAT column, then we'll avoid // using // the APPEND_VALUES hint. If there is a NULL in the HDFS file, then we'll // encounter // "ORA-12838: cannot read/modify an object after modifying it in parallel" // due to the JDBC driver issuing the INSERT statement twice to the database // without a COMMIT in between (as was observed via WireShark). // We're not sure why this happens - we just know how to avoid it. if (result) { boolean binaryDoubleColumnExists = false; boolean binaryFloatColumnExists = false; for (int idx = 0; idx < this.tableColumns.size(); idx++) { OracleTableColumn oracleTableColumn = this.tableColumns.get(idx); if (oracleTableColumn.getOracleType() == OracleQueries.getOracleType("BINARY_DOUBLE")) { binaryDoubleColumnExists = true; } if (oracleTableColumn.getOracleType() == OracleQueries.getOracleType("BINARY_FLOAT")) { binaryFloatColumnExists = true; } } if (binaryDoubleColumnExists || binaryFloatColumnExists) { result = false; LOG.info("The APPEND_VALUES Oracle hint will not be used for the " + "INSERT SQL statement, as the Oracle table " + "contains either a BINARY_DOUBLE or BINARY_FLOAT column."); } } return result; } protected boolean allowUserToOverrideUseOfTheOracleAppendValuesHint(ToJobConfig jobConfig, boolean useAppendValuesOracleHint) { boolean result = useAppendValuesOracleHint; // Has the user forced the use of APPEND_VALUES either on or off?... switch (OracleUtilities.getOracleAppendValuesHintUsage(jobConfig)) { case OFF: result = false; LOG.debug(String.format( "Use of the APPEND_VALUES Oracle hint has been forced OFF. " + "(It was %s to used).", useAppendValuesOracleHint ? "going" : "not going")); break; case ON: result = true; LOG.debug(String.format( "Use of the APPEND_VALUES Oracle hint has been forced ON. " + "(It was %s to used).", useAppendValuesOracleHint ? "going" : "not going")); break; case AUTO: LOG.debug(String.format("The APPEND_VALUES Oracle hint %s be used.", result ? "will" : "will not")); break; default: throw new RuntimeException("Invalid value for APPEND_VALUES."); } return result; } }