Java tutorial
/* * Copyright (C) 2016 Stratio (http://stratio.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.connector.jdbc.oracle; import java.io.Serializable; import java.sql.Connection; import java.sql.SQLException; import java.util.Date; import java.util.List; import org.apache.commons.lang.BooleanUtils; import org.apache.log4j.Logger; import org.apache.sqoop.common.MutableContext; import org.apache.sqoop.connector.jdbc.oracle.configuration.ConnectionConfig; import org.apache.sqoop.connector.jdbc.oracle.configuration.LinkConfiguration; import org.apache.sqoop.connector.jdbc.oracle.configuration.ToJobConfig; import org.apache.sqoop.connector.jdbc.oracle.configuration.ToJobConfiguration; import org.apache.sqoop.connector.jdbc.oracle.util.OracleQueries; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTable; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTablePartition; import org.apache.sqoop.connector.jdbc.oracle.util.OracleTablePartitions; import org.apache.sqoop.connector.jdbc.oracle.util.OracleUtilities; import org.apache.sqoop.connector.jdbc.oracle.util.OracleUtilities.UpdateMode; import org.apache.sqoop.job.etl.InitializerContext; public class OracleJdbcToInitializer extends OracleJdbcCommonInitializer<ToJobConfiguration> implements Serializable { private static final Logger LOG = Logger.getLogger(OracleJdbcToInitializer.class); @Override public void connect(InitializerContext context, LinkConfiguration linkConfiguration, ToJobConfiguration jobConfiguration) throws SQLException { super.connect(context, linkConfiguration, jobConfiguration); table = OracleUtilities.decodeOracleTableName(linkConfiguration.connectionConfig.username, jobConfiguration.toJobConfig.tableName); } @Override public void initialize(InitializerContext context, LinkConfiguration linkConfiguration, ToJobConfiguration jobConfiguration) { super.initialize(context, linkConfiguration, jobConfiguration); LOG.debug("Running Oracle JDBC connector initializer"); try { createAnyRequiredOracleObjects(context.getContext(), jobConfiguration.toJobConfig, linkConfiguration.connectionConfig); if (!isSqoopTableAnOracleTable(connection, linkConfiguration.connectionConfig.username, table)) { throw new RuntimeException("Can only load data into Oracle tables."); } } catch (SQLException ex) { throw new RuntimeException(ex); } } private void createAnyRequiredOracleObjects(MutableContext context, ToJobConfig jobConfig, ConnectionConfig connectionConfig) throws SQLException { // The SYSDATE on the Oracle database will be used as the partition value // for this export job... Object sysDateTime = OracleQueries.getSysDate(connection); String sysDateStr = OracleQueries.oraDATEToString(sysDateTime, "yyyy-mm-dd hh24:mi:ss"); context.setString(OracleJdbcConnectorConstants.SQOOP_ORACLE_JOB_SYSDATE, sysDateStr); checkForOldOraOopTemporaryOracleTables(connection, sysDateTime, OracleQueries.getCurrentSchema(connection)); // Store the actual partition value, so the N mappers know what value to // insert... String partitionValue = OracleQueries.oraDATEToString(sysDateTime, OracleJdbcConnectorConstants.ORAOOP_EXPORT_PARTITION_DATE_FORMAT); context.setString(OracleJdbcConnectorConstants.ORAOOP_EXPORT_PARTITION_DATE_VALUE, partitionValue); // Generate the (22 character) partition name... String partitionName = OracleUtilities.createExportTablePartitionNameFromOracleTimestamp(sysDateTime); //TODO: Number of mappers needs to be fixed int numMappers = 8; String exportTableTemplate = jobConfig.templateTable; if (exportTableTemplate == null) { exportTableTemplate = ""; } String user = connectionConfig.username; //TODO: This is from the other Oracle Manager //if (user == null) { // user = OracleManager.getSessionUser(connection); //} OracleTable templateTableContext = OracleUtilities.decodeOracleTableName(user, exportTableTemplate); boolean noLoggingOnNewTable = BooleanUtils.isTrue(jobConfig.nologging); List<String> updateKeyCol = jobConfig.updateKey; /* =========================== */ /* VALIDATION OF INPUTS */ /* =========================== */ if (updateKeyCol == null || updateKeyCol.isEmpty()) { // We're performing an "insert" export, not an "update" export. // Check that the "oraoop.export.merge" property has not been specified, // as this would be // an invalid scenario... if (OracleUtilities.getExportUpdateMode(jobConfig) == UpdateMode.Merge) { throw new RuntimeException("The merge option can only be used if " + "an update key is specified."); } } if (OracleUtilities.userWantsToCreatePartitionedExportTableFromTemplate(jobConfig) || OracleUtilities.userWantsToCreateNonPartitionedExportTableFromTemplate(jobConfig)) { // OraOop will create the export table. if (table.getName().length() > OracleJdbcConnectorConstants.Oracle.MAX_IDENTIFIER_LENGTH) { String msg = String.format( "The Oracle table name \"%s\" is longer than %d characters.\n" + "Oracle will not allow a table with this name to be created.", table.getName(), OracleJdbcConnectorConstants.Oracle.MAX_IDENTIFIER_LENGTH); throw new RuntimeException(msg); } if (updateKeyCol != null && !updateKeyCol.isEmpty()) { // We're performing an "update" export, not an "insert" export. // Check whether the user is attempting an "update" (i.e. a non-merge). // If so, they're // asking to only UPDATE rows in a (about to be created) (empty) table // that contains no rows. // This will be a waste of time, as we'd be attempting to perform UPDATE // operations against a // table with no rows in it... UpdateMode updateMode = OracleUtilities.getExportUpdateMode(jobConfig); if (updateMode == UpdateMode.Update) { throw new RuntimeException( String.format("\n\nCombining the template table option with the merge " + "option is nonsensical, as this would create an " + "empty table and then perform " + "a lot of work that results in a table containing no rows.\n")); } } // Check that the specified template table actually exists and is a // table... String templateTableObjectType = OracleQueries.getOracleObjectType(connection, templateTableContext); if (templateTableObjectType == null) { throw new RuntimeException( String.format("The specified Oracle template table \"%s\" does not exist.", templateTableContext.toString())); } if (!templateTableObjectType.equalsIgnoreCase(OracleJdbcConnectorConstants.Oracle.OBJECT_TYPE_TABLE)) { throw new RuntimeException(String.format( "The specified Oracle template table \"%s\" is not an " + "Oracle table, it's a %s.", templateTableContext.toString(), templateTableObjectType)); } if (BooleanUtils.isTrue(jobConfig.dropTableIfExists)) { OracleQueries.dropTable(connection, table); } // Check that there is no existing database object with the same name of // the table to be created... String newTableObjectType = OracleQueries.getOracleObjectType(connection, table); if (newTableObjectType != null) { throw new RuntimeException(String.format( "%s cannot create a new Oracle table named %s as a \"%s\" " + "with this name already exists.", OracleJdbcConnectorConstants.CONNECTOR_NAME, table.toString(), newTableObjectType)); } } else { // The export table already exists. if (updateKeyCol != null && !updateKeyCol.isEmpty()) { // We're performing an "update" export, not an "insert" export. // Check that there exists an index on the export table on the // update-key column(s). // Without such an index, this export may perform like a real dog... String[] updateKeyColumns = OracleUtilities.getExportUpdateKeyColumnNames(jobConfig); if (!OracleQueries.doesIndexOnColumnsExist(connection, table, updateKeyColumns)) { String msg = String.format( "\n**************************************************************" + "***************************************************************" + "\n\tThe table %1$s does not have a valid index on " + "the column(s) %2$s.\n" + "\tAs a consequence, this export may take a long time to " + "complete.\n" + "\tIf performance is unacceptable, consider reattempting this " + "job after creating an index " + "on this table via the SQL...\n" + "\t\tcreate index <index_name> on %1$s(%2$s);\n" + "****************************************************************" + "*************************************************************", table.toString(), OracleUtilities.stringArrayToCSV(updateKeyColumns)); LOG.warn(msg); } } } boolean createMapperTables = false; if (updateKeyCol != null && !updateKeyCol.isEmpty()) { createMapperTables = true; } if (OracleUtilities.userWantsToCreatePartitionedExportTableFromTemplate(jobConfig)) { /* ================================= */ /* CREATE A PARTITIONED TABLE */ /* ================================= */ // Create a new Oracle table using the specified template... String[] subPartitionNames = OracleUtilities.generateExportTableSubPartitionNames(numMappers, sysDateTime); // Create the export table from a template table... String tableStorageClause = OracleUtilities.getExportTableStorageClause(jobConfig); OracleQueries.createExportTableFromTemplateWithPartitioning(connection, table, tableStorageClause, templateTableContext, noLoggingOnNewTable, partitionName, sysDateTime, numMappers, subPartitionNames); createMapperTables = true; } else if (OracleUtilities.userWantsToCreateNonPartitionedExportTableFromTemplate(jobConfig)) { /* ===================================== */ /* CREATE A NON-PARTITIONED TABLE */ /* ===================================== */ String tableStorageClause = OracleUtilities.getExportTableStorageClause(jobConfig); OracleQueries.createExportTableFromTemplate(connection, table, tableStorageClause, templateTableContext, noLoggingOnNewTable); } else { /* ===================================================== */ /* ADD ADDITIONAL PARTITIONS TO AN EXISTING TABLE */ /* ===================================================== */ // If the export table is partitioned, and the partitions were created by // OraOop, then we need // create additional partitions... OracleTablePartitions tablePartitions = OracleQueries.getPartitions(connection, table); // Find any partition name starting with "ORAOOP_"... OracleTablePartition oraOopPartition = tablePartitions .findPartitionByRegEx("^" + OracleJdbcConnectorConstants.EXPORT_TABLE_PARTITION_NAME_PREFIX); if (tablePartitions.size() > 0 && oraOopPartition == null) { for (int idx = 0; idx < tablePartitions.size(); idx++) { LOG.info(String.format("The Oracle table %s has a partition named \"%s\".", table.toString(), tablePartitions.get(idx).getName())); } LOG.warn(String.format( "The Oracle table %s is partitioned.\n" + "These partitions were not created by %s.", table.toString(), OracleJdbcConnectorConstants.CONNECTOR_NAME)); } if (oraOopPartition != null) { // Indicate in the configuration what's happening... context.setBoolean(OracleJdbcConnectorConstants.EXPORT_TABLE_HAS_SQOOP_PARTITIONS, true); LOG.info(String.format( "The Oracle table %s is partitioned.\n" + "These partitions were created by %s, so " + "additional partitions will now be created.\n" + "The name of the new partition will be \"%s\".", table.toString(), OracleJdbcConnectorConstants.CONNECTOR_NAME, partitionName)); String[] subPartitionNames = OracleUtilities.generateExportTableSubPartitionNames(numMappers, sysDateTime); // Add another partition (and N subpartitions) to this existing, // partitioned export table... OracleQueries.createMoreExportTablePartitions(connection, table, partitionName, sysDateTime, subPartitionNames); createMapperTables = true; } } if (createMapperTables) { createUniqueMapperTable(sysDateTime, numMappers, jobConfig); } } private void createUniqueMapperTable(Object sysDateTime, int numMappers, ToJobConfig jobConfig) throws SQLException { // Mappers insert data into a unique table before either: // - exchanging it into a subpartition of the 'real' export table; or // - merging it into the 'real' export table. for (int i = 0; i < numMappers; i++) { OracleTable mapperTable = OracleUtilities.generateExportTableMapperTableName(i, sysDateTime, null); // If this mapper is being reattempted in response to a failure, we need // to delete the // temporary table created by the previous attempt... OracleQueries.dropTable(connection, mapperTable); String temporaryTableStorageClause = OracleUtilities.getTemporaryTableStorageClause(jobConfig); OracleQueries.createExportTableForMapper(connection, mapperTable, temporaryTableStorageClause, table, false); // <- addOraOopPartitionColumns LOG.debug(String.format("Created temporary mapper table %s", mapperTable.toString())); } } private void checkForOldOraOopTemporaryOracleTables(Connection connection, Object sysDateTime, String schema) { try { StringBuilder message = new StringBuilder(); message.append(String.format("The following tables appear to be old temporary tables created by " + "%s that have not been deleted.\n" + "They are probably left over from jobs that encountered an error and " + "could not clean up after themselves.\n" + "You might want to drop these Oracle tables in order to reclaim " + "Oracle storage space:\n", OracleJdbcConnectorConstants.CONNECTOR_NAME)); boolean showMessage = false; String generatedTableName = OracleUtilities.generateExportTableMapperTableName(0, sysDateTime, schema) .getName(); generatedTableName = generatedTableName.replaceAll("[0-9]", "%"); generatedTableName = OracleUtilities.replaceAll(generatedTableName, "%%", "%"); Date sysDate = OracleQueries.oraDATEToDate(sysDateTime); List<OracleTable> tables = OracleQueries.getTablesWithTableNameLike(connection, schema, generatedTableName); for (OracleTable oracleTable : tables) { OracleUtilities.DecodedExportMapperTableName tableName = OracleUtilities .decodeExportTableMapperTableName(oracleTable); if (tableName != null) { Date tableDate = OracleQueries.oraDATEToDate(tableName.getTableDateTime()); double daysApart = (sysDate.getTime() - tableDate.getTime()) / (1000 * 60 * 60 * 24); if (daysApart > 1.0) { showMessage = true; message.append(String.format("\t%s\n", oracleTable.toString())); } } } if (showMessage) { LOG.info(message.toString()); } } catch (Exception ex) { LOG.warn(String.format("%s was unable to check for the existance of old " + "temporary Oracle tables.\n" + "Error:\n%s", OracleJdbcConnectorConstants.CONNECTOR_NAME, ex.toString())); } } private boolean isSqoopTableAnOracleTable(Connection connection, String connectionUserName, OracleTable tableContext) { String oracleObjectType; try { // Find the table via dba_tables... OracleTable oracleTable = OracleQueries.getTable(connection, tableContext.getSchema(), tableContext.getName()); if (oracleTable != null) { return true; } // If we could not find the table via dba_tables, then try and determine // what type of database object the // user was referring to. Perhaps they've specified the name of a view?... oracleObjectType = OracleQueries.getOracleObjectType(connection, tableContext); if (oracleObjectType == null) { LOG.info(String.format( "%1$s will not process this Sqoop connection, " + "as the Oracle user %2$s does not own a table named %3$s.\n" + "\tPlease prefix the table name with the owner.\n " + "\tNote: You may need to double-quote the owner and/or table name." + "\n\tE.g. sqoop ... --username %4$s --table %2$s.%3$s\n", OracleJdbcConnectorConstants.CONNECTOR_NAME, tableContext.getSchema(), tableContext.getName(), connectionUserName)); return false; } } catch (SQLException ex) { LOG.warn(String.format( "Unable to determine the Oracle-type of the object named %s owned by " + "%s.\nError:\n" + "%s", tableContext.getName(), tableContext.getSchema(), ex.getMessage())); // In the absence of conflicting information, let's assume the object is // actually a table... return true; } boolean result = oracleObjectType.equalsIgnoreCase(OracleJdbcConnectorConstants.Oracle.OBJECT_TYPE_TABLE); if (!result) { LOG.info(String.format( "%s will not process this sqoop connection, " + "as %s is not an Oracle table, it's a %s.", OracleJdbcConnectorConstants.CONNECTOR_NAME, tableContext.toString(), oracleObjectType)); } return result; } @Override protected List<String> getColumnNames(ToJobConfiguration jobConfiguration) throws SQLException { List<String> colNames = OracleQueries.getToTableColumnNames(connection, table, true, true); return OracleUtilities.getSelectedColumnNamesInOracleTable(table, colNames, jobConfiguration.toJobConfig.columns); } }