|
21 | 21 | import com.google.common.io.Resources;
|
22 | 22 | import io.airlift.json.ObjectMapperProvider;
|
23 | 23 | import io.trino.filesystem.TrinoFileSystem;
|
| 24 | +import io.trino.filesystem.TrinoInputFile; |
24 | 25 | import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
|
| 26 | +import io.trino.filesystem.local.LocalInputFile; |
| 27 | +import io.trino.parquet.ParquetReaderOptions; |
| 28 | +import io.trino.parquet.reader.MetadataReader; |
| 29 | +import io.trino.plugin.deltalake.transactionlog.AddFileEntry; |
25 | 30 | import io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry;
|
26 | 31 | import io.trino.plugin.deltalake.transactionlog.MetadataEntry;
|
| 32 | +import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics; |
| 33 | +import io.trino.plugin.hive.FileFormatDataSourceStats; |
| 34 | +import io.trino.plugin.hive.parquet.TrinoParquetDataSource; |
27 | 35 | import io.trino.testing.AbstractTestQueryFramework;
|
28 | 36 | import io.trino.testing.QueryRunner;
|
| 37 | +import org.apache.parquet.hadoop.metadata.FileMetaData; |
| 38 | +import org.apache.parquet.hadoop.metadata.ParquetMetadata; |
| 39 | +import org.apache.parquet.schema.PrimitiveType; |
29 | 40 | import org.assertj.core.api.Assertions;
|
30 | 41 | import org.testng.annotations.BeforeClass;
|
31 | 42 | import org.testng.annotations.DataProvider;
|
|
38 | 49 | import java.nio.file.Files;
|
39 | 50 | import java.nio.file.Path;
|
40 | 51 | import java.util.List;
|
| 52 | +import java.util.Optional; |
41 | 53 | import java.util.regex.Pattern;
|
42 | 54 | import java.util.stream.Stream;
|
43 | 55 |
|
@@ -69,11 +81,15 @@ public class TestDeltaLakeBasic
|
69 | 81 | // The col-{uuid} pattern for delta.columnMapping.physicalName
|
70 | 82 | private static final Pattern PHYSICAL_COLUMN_NAME_PATTERN = Pattern.compile("^col-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$");
|
71 | 83 |
|
| 84 | + private static final TrinoFileSystem FILE_SYSTEM = new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS).create(SESSION); |
| 85 | + |
72 | 86 | @Override
|
73 | 87 | protected QueryRunner createQueryRunner()
|
74 | 88 | throws Exception
|
75 | 89 | {
|
76 |
| - return createDeltaLakeQueryRunner(DELTA_CATALOG, ImmutableMap.of(), ImmutableMap.of("delta.register-table-procedure.enabled", "true")); |
| 90 | + return createDeltaLakeQueryRunner(DELTA_CATALOG, ImmutableMap.of(), ImmutableMap.of( |
| 91 | + "delta.register-table-procedure.enabled", "true", |
| 92 | + "delta.enable-non-concurrent-writes", "true")); |
77 | 93 | }
|
78 | 94 |
|
79 | 95 | @BeforeClass
|
@@ -208,6 +224,66 @@ public void testAddNestedColumnWithColumnMappingMode(String columnMappingMode)
|
208 | 224 | .containsPattern("(delta\\.columnMapping\\.physicalName.*?){11}");
|
209 | 225 | }
|
210 | 226 |
|
| 227 | + /** |
| 228 | + * @see deltalake.column_mapping_mode_id |
| 229 | + * @see deltalake.column_mapping_mode_name |
| 230 | + */ |
| 231 | + @Test(dataProvider = "columnMappingModeDataProvider") |
| 232 | + public void testOptimizeWithColumnMappingMode(String columnMappingMode) |
| 233 | + throws Exception |
| 234 | + { |
| 235 | + // The table contains 'x' column with column mapping mode |
| 236 | + String tableName = "test_optimize_" + randomNameSuffix(); |
| 237 | + Path tableLocation = Files.createTempFile(tableName, null); |
| 238 | + copyDirectoryContents(new File(Resources.getResource("deltalake/column_mapping_mode_" + columnMappingMode).toURI()).toPath(), tableLocation); |
| 239 | + |
| 240 | + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); |
| 241 | + assertThat(query("DESCRIBE " + tableName)).projected("Column", "Type").skippingTypesCheck().matches("VALUES ('x', 'integer')"); |
| 242 | + assertQueryReturnsEmptyResult("SELECT * FROM " + tableName); |
| 243 | + |
| 244 | + MetadataEntry originalMetadata = loadMetadataEntry(0, tableLocation); |
| 245 | + JsonNode schema = OBJECT_MAPPER.readTree(originalMetadata.getSchemaString()); |
| 246 | + List<JsonNode> fields = ImmutableList.copyOf(schema.get("fields").elements()); |
| 247 | + Assertions.assertThat(fields).hasSize(1); |
| 248 | + JsonNode column = fields.get(0); |
| 249 | + String physicalName = column.get("metadata").get("delta.columnMapping.physicalName").asText(); |
| 250 | + int id = column.get("metadata").get("delta.columnMapping.id").asInt(); |
| 251 | + |
| 252 | + assertUpdate("INSERT INTO " + tableName + " VALUES 10", 1); |
| 253 | + assertUpdate("INSERT INTO " + tableName + " VALUES 20", 1); |
| 254 | + assertUpdate("INSERT INTO " + tableName + " VALUES NULL", 1); |
| 255 | + assertUpdate("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); |
| 256 | + |
| 257 | + // Verify 'add' entry contains the expected physical name in the stats |
| 258 | + List<DeltaLakeTransactionLogEntry> transactionLog = getEntriesFromJson(4, tableLocation.resolve("_delta_log").toString(), FILE_SYSTEM).orElseThrow(); |
| 259 | + assertThat(transactionLog).hasSize(5); |
| 260 | + assertThat(transactionLog.get(0).getCommitInfo()).isNotNull(); |
| 261 | + assertThat(transactionLog.get(1).getRemove()).isNotNull(); |
| 262 | + assertThat(transactionLog.get(2).getRemove()).isNotNull(); |
| 263 | + assertThat(transactionLog.get(3).getRemove()).isNotNull(); |
| 264 | + assertThat(transactionLog.get(4).getAdd()).isNotNull(); |
| 265 | + AddFileEntry addFileEntry = transactionLog.get(4).getAdd(); |
| 266 | + DeltaLakeFileStatistics stats = addFileEntry.getStats().orElseThrow(); |
| 267 | + assertThat(stats.getMinValues().orElseThrow().get(physicalName)).isEqualTo(10); |
| 268 | + assertThat(stats.getMaxValues().orElseThrow().get(physicalName)).isEqualTo(20); |
| 269 | + assertThat(stats.getNullCount(physicalName).orElseThrow()).isEqualTo(1); |
| 270 | + |
| 271 | + // Verify optimized parquet file contains the expected physical id and name |
| 272 | + TrinoInputFile inputFile = new LocalInputFile(tableLocation.resolve(addFileEntry.getPath()).toFile()); |
| 273 | + ParquetMetadata parquetMetadata = MetadataReader.readFooter( |
| 274 | + new TrinoParquetDataSource(inputFile, new ParquetReaderOptions(), new FileFormatDataSourceStats()), |
| 275 | + Optional.empty()); |
| 276 | + FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); |
| 277 | + PrimitiveType physicalType = getOnlyElement(fileMetaData.getSchema().getColumns().iterator()).getPrimitiveType(); |
| 278 | + assertThat(physicalType.getName()).isEqualTo(physicalName); |
| 279 | + if (columnMappingMode.equals("id")) { |
| 280 | + assertThat(physicalType.getId().intValue()).isEqualTo(id); |
| 281 | + } |
| 282 | + else { |
| 283 | + assertThat(physicalType.getId()).isNull(); |
| 284 | + } |
| 285 | + } |
| 286 | + |
211 | 287 | @DataProvider
|
212 | 288 | public Object[][] columnMappingModeDataProvider()
|
213 | 289 | {
|
|
0 commit comments