|
| 1 | +/* |
| 2 | + * Copyright (2021) The Delta Lake Project Authors. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package org.apache.spark.sql.delta.columnmapping |
| 18 | + |
| 19 | +import java.util.concurrent.TimeUnit |
| 20 | + |
| 21 | +import org.apache.spark.sql.delta._ |
| 22 | +import org.apache.spark.sql.delta.DeltaConfigs._ |
| 23 | +import org.apache.spark.sql.delta.sources.DeltaSQLConf._ |
| 24 | + |
| 25 | +import org.apache.spark.sql.catalyst.TableIdentifier |
| 26 | +import org.apache.spark.util.ManualClock |
| 27 | + |
| 28 | +/** |
| 29 | + * Test dropping column mapping feature from a table. |
| 30 | + */ |
| 31 | +class DropColumnMappingFeatureSuite extends RemoveColumnMappingSuiteUtils { |
| 32 | + |
| 33 | + val clock = new ManualClock(System.currentTimeMillis()) |
| 34 | + test("column mapping cannot be dropped without the feature flag") { |
| 35 | + withSQLConf(ALLOW_COLUMN_MAPPING_REMOVAL.key -> "false") { |
| 36 | + sql(s"""CREATE TABLE $testTableName |
| 37 | + |USING delta |
| 38 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'name', |
| 39 | + | 'delta.minReaderVersion' = '3', |
| 40 | + | 'delta.minWriterVersion' = '7') |
| 41 | + |AS SELECT 1 as a |
| 42 | + |""".stripMargin) |
| 43 | + |
| 44 | + intercept[DeltaColumnMappingUnsupportedException] { |
| 45 | + dropColumnMappingTableFeature() |
| 46 | + } |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + test("table without column mapping enabled") { |
| 51 | + sql(s"""CREATE TABLE $testTableName |
| 52 | + |USING delta |
| 53 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'none') |
| 54 | + |AS SELECT 1 as a |
| 55 | + |""".stripMargin) |
| 56 | + |
| 57 | + val e = intercept[DeltaTableFeatureException] { |
| 58 | + dropColumnMappingTableFeature() |
| 59 | + } |
| 60 | + checkError(e, |
| 61 | + errorClass = DeltaErrors.dropTableFeatureFeatureNotSupportedByProtocol(".") |
| 62 | + .getErrorClass, |
| 63 | + parameters = Map("feature" -> "columnMapping")) |
| 64 | + } |
| 65 | + |
| 66 | + test("invalid column names") { |
| 67 | + val invalidColName1 = colName("col1") |
| 68 | + val invalidColName2 = colName("col2") |
| 69 | + sql( |
| 70 | + s"""CREATE TABLE $testTableName (a INT, `$invalidColName1` INT, `$invalidColName2` INT) |
| 71 | + |USING delta |
| 72 | + |TBLPROPERTIES ('delta.columnMapping.mode' = 'name') |
| 73 | + |""".stripMargin) |
| 74 | + val e = intercept[DeltaTableFeatureException] { |
| 75 | + dropColumnMappingTableFeature() |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + test("drop column mapping from a table without table feature") { |
| 80 | + sql( |
| 81 | + s"""CREATE TABLE $testTableName |
| 82 | + |USING delta |
| 83 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'name', |
| 84 | + | '${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = 'false', |
| 85 | + | 'delta.minReaderVersion' = '1', |
| 86 | + | 'delta.minWriterVersion' = '1') |
| 87 | + |AS SELECT id as $logicalColumnName, id + 1 as $secondColumn |
| 88 | + | FROM RANGE(0, $totalRows, 1, $numFiles) |
| 89 | + |""".stripMargin) |
| 90 | + val e = intercept[DeltaTableFeatureException] { |
| 91 | + dropColumnMappingTableFeature() |
| 92 | + } |
| 93 | + checkError(e, |
| 94 | + errorClass = "DELTA_FEATURE_DROP_FEATURE_NOT_PRESENT", |
| 95 | + parameters = Map("feature" -> "columnMapping")) |
| 96 | + } |
| 97 | + |
| 98 | + test("drop column mapping from a table with table feature") { |
| 99 | + sql( |
| 100 | + s"""CREATE TABLE $testTableName |
| 101 | + |USING delta |
| 102 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'name', |
| 103 | + | '${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = 'false', |
| 104 | + | 'delta.minReaderVersion' = '3', |
| 105 | + | 'delta.minWriterVersion' = '7') |
| 106 | + |AS SELECT id as $logicalColumnName, id + 1 as $secondColumn |
| 107 | + | FROM RANGE(0, $totalRows, 1, $numFiles) |
| 108 | + |""".stripMargin) |
| 109 | + testDroppingColumnMapping() |
| 110 | + } |
| 111 | + |
| 112 | + test("drop column mapping from a table without column mapping table property") { |
| 113 | + sql( |
| 114 | + s"""CREATE TABLE $testTableName |
| 115 | + |USING delta |
| 116 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'name', |
| 117 | + | '${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = 'false', |
| 118 | + | 'delta.minReaderVersion' = '3', |
| 119 | + | 'delta.minWriterVersion' = '7') |
| 120 | + |AS SELECT id as $logicalColumnName, id + 1 as $secondColumn |
| 121 | + | FROM RANGE(0, $totalRows, 1, $numFiles) |
| 122 | + |""".stripMargin) |
| 123 | + unsetColumnMappingProperty(useUnset = true) |
| 124 | + val e = intercept[DeltaTableFeatureException] { |
| 125 | + dropColumnMappingTableFeature() |
| 126 | + } |
| 127 | + checkError( |
| 128 | + e, |
| 129 | + errorClass = "DELTA_FEATURE_DROP_HISTORICAL_VERSIONS_EXIST", |
| 130 | + parameters = Map( |
| 131 | + "feature" -> "columnMapping", |
| 132 | + "logRetentionPeriodKey" -> "delta.logRetentionDuration", |
| 133 | + "logRetentionPeriod" -> "30 days", |
| 134 | + "truncateHistoryLogRetentionPeriod" -> "24 hours") |
| 135 | + ) |
| 136 | + } |
| 137 | + |
| 138 | + test("drop column mapping in id mode") { |
| 139 | + sql( |
| 140 | + s"""CREATE TABLE $testTableName |
| 141 | + |USING delta |
| 142 | + |TBLPROPERTIES ('${DeltaConfigs.COLUMN_MAPPING_MODE.key}' = 'id', |
| 143 | + | '${DeltaConfigs.ENABLE_DELETION_VECTORS_CREATION.key}' = 'false', |
| 144 | + | 'delta.minReaderVersion' = '3', |
| 145 | + | 'delta.minWriterVersion' = '7') |
| 146 | + |AS SELECT id as $logicalColumnName, id + 1 as $secondColumn |
| 147 | + | FROM RANGE(0, $totalRows, 1, $numFiles) |
| 148 | + |""".stripMargin) |
| 149 | + testDroppingColumnMapping() |
| 150 | + } |
| 151 | + |
| 152 | + def testDroppingColumnMapping(): Unit = { |
| 153 | + // Verify the input data is as expected. |
| 154 | + val originalData = spark.table(tableName = testTableName).select(logicalColumnName).collect() |
| 155 | + // Add a schema comment and verify it is preserved after the rewrite. |
| 156 | + val comment = "test comment" |
| 157 | + sql(s"ALTER TABLE $testTableName ALTER COLUMN $logicalColumnName COMMENT '$comment'") |
| 158 | + |
| 159 | + val deltaLog = DeltaLog.forTable(spark, TableIdentifier(tableName = testTableName)) |
| 160 | + val originalSnapshot = deltaLog.update() |
| 161 | + |
| 162 | + assert(originalSnapshot.schema.head.getComment().get == comment, |
| 163 | + "Renamed column should preserve comment.") |
| 164 | + val originalFiles = getFiles(originalSnapshot) |
| 165 | + val startingVersion = deltaLog.update().version |
| 166 | + |
| 167 | + val e = intercept[DeltaTableFeatureException] { |
| 168 | + dropColumnMappingTableFeature() |
| 169 | + } |
| 170 | + checkError( |
| 171 | + e, |
| 172 | + errorClass = "DELTA_FEATURE_DROP_WAIT_FOR_RETENTION_PERIOD", |
| 173 | + parameters = Map( |
| 174 | + "feature" -> "columnMapping", |
| 175 | + "logRetentionPeriodKey" -> "delta.logRetentionDuration", |
| 176 | + "logRetentionPeriod" -> "30 days", |
| 177 | + "truncateHistoryLogRetentionPeriod" -> "24 hours") |
| 178 | + ) |
| 179 | + |
| 180 | + verifyRewrite( |
| 181 | + unsetTableProperty = true, |
| 182 | + deltaLog, |
| 183 | + originalFiles, |
| 184 | + startingVersion, |
| 185 | + originalData = originalData, |
| 186 | + droppedFeature = true) |
| 187 | + // Verify the schema comment is preserved after the rewrite. |
| 188 | + assert(deltaLog.update().schema.head.getComment().get == comment, |
| 189 | + "Should preserve the schema comment.") |
| 190 | + verifyDropFeatureTruncateHistory() |
| 191 | + } |
| 192 | + |
| 193 | + protected def verifyDropFeatureTruncateHistory() = { |
| 194 | + val deltaLog1 = DeltaLog.forTable(spark, TableIdentifier(tableName = testTableName), clock) |
| 195 | + // Populate the delta cache with the delta log with the right data path so it stores the clock. |
| 196 | + // This is currently the only way to make sure the drop feature command uses the clock. |
| 197 | + DeltaLog.clearCache() |
| 198 | + DeltaLog.forTable(spark, deltaLog1.dataPath, clock) |
| 199 | + // Set the log retention to 0 so that we can test truncate history. |
| 200 | + sql( |
| 201 | + s""" |
| 202 | + |ALTER TABLE $testTableName SET TBLPROPERTIES ( |
| 203 | + | '${TABLE_FEATURE_DROP_TRUNCATE_HISTORY_LOG_RETENTION.key}' = '0 hours', |
| 204 | + | '${LOG_RETENTION.key}' = '0 hours') |
| 205 | + |""".stripMargin) |
| 206 | + // Pretend enough time has passed for the history to be truncated. |
| 207 | + clock.advance(TimeUnit.MINUTES.toMillis(5)) |
| 208 | + sql( |
| 209 | + s""" |
| 210 | + |ALTER TABLE $testTableName DROP FEATURE ${ColumnMappingTableFeature.name} TRUNCATE HISTORY |
| 211 | + |""".stripMargin) |
| 212 | + val newSnapshot = deltaLog.update() |
| 213 | + assert(newSnapshot.protocol.readerAndWriterFeatures.isEmpty, "Should drop the feature.") |
| 214 | + assert(newSnapshot.protocol.minWriterVersion == 1) |
| 215 | + assert(newSnapshot.protocol.minReaderVersion == 1) |
| 216 | + } |
| 217 | + |
| 218 | + protected def dropColumnMappingTableFeature(): Unit = { |
| 219 | + sql( |
| 220 | + s""" |
| 221 | + |ALTER TABLE $testTableName DROP FEATURE ${ColumnMappingTableFeature.name} |
| 222 | + |""".stripMargin) |
| 223 | + } |
| 224 | +} |
0 commit comments