List of usage examples for org.apache.commons.collections4 MultiValuedMap containsKey
boolean containsKey(Object key);
From source file:org.finra.herd.service.helper.Hive13DdlGenerator.java
/** * Adds the relative "alter table add partition" statements for each storage unit entity. Please note that each request partition value might result in * multiple available storage unit entities (subpartitions). * * @param sb the string builder to be updated with the "alter table add partition" statements * @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values * @param businessObjectFormatForSchema the business object format to be used for schema * @param ifNotExistsOption specifies if generated DDL contains "if not exists" option * @param storageUnitAvailabilityDtos the list of storage unit availability DTOs *//*from ww w. ja v a 2s. c o m*/ private void processStorageUnitsForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormat businessObjectFormatForSchema, String ifNotExistsOption, List<StorageUnitAvailabilityDto> storageUnitAvailabilityDtos) { // If flag is not set to suppress scan for unregistered sub-partitions, retrieve all storage // file paths for the relative storage units loaded in a multi-valued map for easy access. MultiValuedMap<Integer, String> storageUnitIdToStorageFilePathsMap = BooleanUtils.isTrue( generateDdlRequest.suppressScanForUnregisteredSubPartitions) ? new ArrayListValuedHashMap<>() : storageFileDao.getStorageFilePathsByStorageUnitIds( storageUnitHelper.getStorageUnitIds(storageUnitAvailabilityDtos)); // Crete a map of storage names in upper case to their relative S3 key prefix velocity templates. Map<String, String> s3KeyPrefixVelocityTemplates = new HashMap<>(); // Crete a map of business object format keys to their relative business object format instances. Map<BusinessObjectFormatKey, BusinessObjectFormat> businessObjectFormats = new HashMap<>(); // Get data provider for the business object definition. BusinessObjectDefinitionEntity businessObjectDefinitionEntity = businessObjectDefinitionDaoHelper .getBusinessObjectDefinitionEntity( new BusinessObjectDefinitionKey(businessObjectFormatForSchema.getNamespace(), businessObjectFormatForSchema.getBusinessObjectDefinitionName())); String dataProviderName = businessObjectDefinitionEntity.getDataProvider().getName(); // Generate the beginning of the alter table statement. String alterTableFirstToken = String .format("ALTER TABLE `%s` ADD %s", generateDdlRequest.tableName, ifNotExistsOption).trim(); // Process all available business object data instances. List<String> addPartitionStatements = new ArrayList<>(); for (StorageUnitAvailabilityDto storageUnitAvailabilityDto : storageUnitAvailabilityDtos) { // Get storage name in upper case for this storage unit. String upperCaseStorageName = storageUnitAvailabilityDto.getStorageName().toUpperCase(); // Get storage entity for this storage unit. StorageEntity storageEntity = getStorageEntity(upperCaseStorageName, generateDdlRequest.storageEntities); // Get business object data key for this business object data. BusinessObjectDataKey businessObjectDataKey = storageUnitAvailabilityDto.getBusinessObjectDataKey(); // Get business object format key for this business object data. BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper .getBusinessObjectFormatKey(businessObjectDataKey); // Retrieve s3 key prefix velocity template for this storage. String s3KeyPrefixVelocityTemplate = getS3KeyPrefixVelocityTemplate(upperCaseStorageName, storageEntity, s3KeyPrefixVelocityTemplates); // Retrieve business object format for this business object data. BusinessObjectFormat businessObjectFormat = getBusinessObjectFormat(businessObjectFormatKey, businessObjectFormats); // Build the expected S3 key prefix for this storage unit. String s3KeyPrefix = s3KeyPrefixHelper.buildS3KeyPrefix(s3KeyPrefixVelocityTemplate, dataProviderName, businessObjectFormat, businessObjectDataKey, storageUnitAvailabilityDto.getStorageName()); // If flag is set to suppress scan for unregistered sub-partitions, use the directory path or the S3 key prefix // as the partition's location, otherwise, use storage files to discover all unregistered sub-partitions. Collection<String> storageFilePaths = new ArrayList<>(); if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) { // Validate the directory path value if it is present. if (storageUnitAvailabilityDto.getStorageUnitDirectoryPath() != null) { Assert.isTrue(storageUnitAvailabilityDto.getStorageUnitDirectoryPath().equals(s3KeyPrefix), String.format( "Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitAvailabilityDto.getStorageUnitDirectoryPath(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), storageUnitAvailabilityDto.getStorageName(), s3KeyPrefix)); } // Add the S3 key prefix to the list of storage files. // We add a trailing '/' character to the prefix, since it represents a directory. storageFilePaths.add(StringUtils.appendIfMissing(s3KeyPrefix, "/")); } else { // Retrieve storage file paths registered with this business object data in the specified storage. storageFilePaths = storageUnitIdToStorageFilePathsMap .containsKey(storageUnitAvailabilityDto.getStorageUnitId()) ? storageUnitIdToStorageFilePathsMap .get(storageUnitAvailabilityDto.getStorageUnitId()) : new ArrayList<>(); // Validate storage file paths registered with this business object data in the specified storage. // The validation check below is required even if we have no storage files registered. storageFileHelper.validateStorageFilePaths(storageFilePaths, s3KeyPrefix, businessObjectDataKey, storageUnitAvailabilityDto.getStorageName()); // If there are no storage files registered for this storage unit, we should use the storage directory path value. if (storageFilePaths.isEmpty()) { // Validate that directory path value is present and it matches the S3 key prefix. Assert.isTrue(storageUnitAvailabilityDto.getStorageUnitDirectoryPath() != null && storageUnitAvailabilityDto.getStorageUnitDirectoryPath().startsWith(s3KeyPrefix), String.format( "Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitAvailabilityDto.getStorageUnitDirectoryPath(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), storageUnitAvailabilityDto.getStorageName(), s3KeyPrefix)); // Add storage directory path the empty storage files list. // We add a trailing '/' character to the path, since it represents a directory. storageFilePaths.add(storageUnitAvailabilityDto.getStorageUnitDirectoryPath() + "/"); } } // Retrieve the s3 bucket name. String s3BucketName = getS3BucketName(upperCaseStorageName, storageEntity, generateDdlRequest.s3BucketNames); // For partitioned table, add the relative partitions to the generated DDL. if (generateDdlRequest.isPartitioned) { // If flag is set to suppress scan for unregistered sub-partitions, validate that the number of primary and sub-partition values specified for // the business object data equals to the number of partition columns defined in schema for the format selected for DDL generation. if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) { int businessObjectDataRegisteredPartitions = 1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()); Assert.isTrue( businessObjectFormatForSchema.getSchema().getPartitions() .size() == businessObjectDataRegisteredPartitions, String.format( "Number of primary and sub-partition values (%d) specified for the business object data is not equal to " + "the number of partition columns (%d) defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataRegisteredPartitions, businessObjectFormatForSchema.getSchema().getPartitions().size(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper .businessObjectFormatKeyToString(businessObjectFormatHelper .getBusinessObjectFormatKey(businessObjectFormatForSchema)))); } // Otherwise, since the format version selected for DDL generation might not match the relative business object format version that business // object data is registered against, validate that the number of sub-partition values specified for the business object data is less than // the number of partition columns defined in schema for the format selected for DDL generation. else { Assert.isTrue( businessObjectFormatForSchema.getSchema().getPartitions().size() > CollectionUtils .size(businessObjectDataKey.getSubPartitionValues()), String.format( "Number of subpartition values specified for the business object data is greater than or equal to " + "the number of partition columns defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper .businessObjectFormatKeyToString(businessObjectFormatHelper .getBusinessObjectFormatKey(businessObjectFormatForSchema)))); } // Get partition information. For multiple level partitioning, auto-discover subpartitions (subdirectories) not already included into the S3 key // prefix. Each discovered partition requires a standalone "add partition" clause. Please note that due to the above validation check, there // should be no auto discoverable sub-partition columns, when flag is set to suppress scan for unregistered sub-partitions. List<SchemaColumn> autoDiscoverableSubPartitionColumns = businessObjectFormatForSchema.getSchema() .getPartitions() .subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormatForSchema.getSchema().getPartitions().size()); // Get and process Hive partitions. for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitAvailabilityDto.getStorageName())) { // Build an add partition statement for this hive partition. StringBuilder addPartitionStatement = new StringBuilder(); addPartitionStatement.append(String.format("%s PARTITION (", BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable) ? " " : alterTableFirstToken)); // Specify all partition column values. List<String> partitionKeyValuePairs = new ArrayList<>(); for (int i = 0; i < businessObjectFormatForSchema.getSchema().getPartitions().size(); i++) { String partitionColumnName = businessObjectFormatForSchema.getSchema().getPartitions() .get(i).getName(); String partitionValue = hivePartition.getPartitionValues().get(i); partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionValue)); } addPartitionStatement.append(StringUtils.join(partitionKeyValuePairs, ", ")); addPartitionStatement.append(String.format(") LOCATION 's3n://%s/%s%s'", s3BucketName, s3KeyPrefix, StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : "")); // Add this add partition statement to the list. addPartitionStatements.add(addPartitionStatement.toString()); } } else // This is a non-partitioned table. { // Get location for this non-partitioned table. String tableLocation = String.format("s3n://%s/%s", s3BucketName, s3KeyPrefix); if (generateDdlRequest.customDdlEntity == null) { // Since custom DDL was not specified and this table is not partitioned, add a LOCATION clause. // This is the last line in the non-partitioned table DDL. sb.append(String.format("LOCATION '%s';", tableLocation)); } else { // Since custom DDL was used for a non-partitioned table, substitute the relative custom DDL token with the actual table location. replacements.put(NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN, tableLocation); } } } // Add all add partition statements to the main string builder. if (CollectionUtils.isNotEmpty(addPartitionStatements)) { // If specified, combine adding multiple partitions in a single ALTER TABLE statement. if (BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable)) { sb.append(alterTableFirstToken).append('\n'); } sb.append(StringUtils.join(addPartitionStatements, BooleanUtils.isTrue(generateDdlRequest.combineMultiplePartitionsInSingleAlterTable) ? ",\n" : ";\n")) .append(";\n"); } }