@@ -280,7 +280,10 @@ object RuleUtils {
280
280
Map (IndexConstants .INDEX_RELATION_IDENTIFIER ))(spark, index)
281
281
282
282
val flatSchema =
283
- ResolverUtils .resolve(spark, index.indexedColumns ++ index.includedColumns, relation.plan)
283
+ ResolverUtils .resolve(
284
+ spark,
285
+ index.indexedColumns ++ index.includedColumns,
286
+ relation.plan)
284
287
// SchemaUtils.escapeFieldNames(SchemaUtils.flatten(relation.plan.schema))
285
288
val updatedOutput =
286
289
if (flatSchema.isDefined && SchemaUtils .containsNestedFieldNames(flatSchema.get)) {
@@ -294,8 +297,8 @@ object RuleUtils {
294
297
}
295
298
} else {
296
299
relation.plan.output
297
- .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
298
- .map(_.asInstanceOf [AttributeReference ])
300
+ .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
301
+ .map(_.asInstanceOf [AttributeReference ])
299
302
}
300
303
relation.createLogicalRelation(indexFsRelation, updatedOutput)
301
304
@@ -328,7 +331,7 @@ object RuleUtils {
328
331
useBucketSpec : Boolean ,
329
332
useBucketUnionForAppended : Boolean ): LogicalPlan = {
330
333
val provider = Hyperspace .getContext(spark).sourceProviderManager
331
- var unhandledAppendedFiles : Seq [Path ] = Nil
334
+ var unhandledAppendedFiles = Seq .empty [Path ]
332
335
// Get transformed plan with index data and appended files if applicable.
333
336
val indexPlan = plan transformUp {
334
337
// Use transformUp here as currently one relation is allowed (pre-requisite).
@@ -367,7 +370,7 @@ object RuleUtils {
367
370
368
371
val filesToRead = {
369
372
if (useBucketSpec || ! index.hasParquetAsSourceFormat || filesDeleted.nonEmpty ||
370
- relation.partitionSchema.nonEmpty) {
373
+ relation.partitionSchema.nonEmpty || index.usesNestedFields ) {
371
374
// Since the index data is in "parquet" format, we cannot read source files
372
375
// in formats other than "parquet" using one FileScan node as the operator requires
373
376
// files in one homogenous format. To address this, we need to read the appended
@@ -391,10 +394,17 @@ object RuleUtils {
391
394
// In order to handle deleted files, read index data with the lineage column so that
392
395
// we could inject Filter-Not-In conditions on the lineage column to exclude the indexed
393
396
// rows from the deleted files.
397
+ val flatSchema = ResolverUtils .resolve(
398
+ spark,
399
+ SchemaUtils .removePrefixNestedFieldNames(index.indexedColumns ++ index.includedColumns),
400
+ relation.plan)
394
401
val newSchema = StructType (
395
- index.schema.filter(s =>
396
- relation.plan.schema.contains(s) || (filesDeleted.nonEmpty && s.name.equals(
397
- IndexConstants .DATA_FILE_NAME_ID ))))
402
+ index.schema.filter(
403
+ s =>
404
+ (flatSchema.isDefined && SchemaUtils
405
+ .prefixNestedFieldNames(flatSchema.get)
406
+ .contains(s.name)) ||
407
+ (filesDeleted.nonEmpty && s.name.equals(IndexConstants .DATA_FILE_NAME_ID ))))
398
408
399
409
def fileIndex : InMemoryFileIndex = {
400
410
new InMemoryFileIndex (spark, filesToRead, Map (), None )
@@ -414,9 +424,22 @@ object RuleUtils {
414
424
new ParquetFileFormat ,
415
425
Map (IndexConstants .INDEX_RELATION_IDENTIFIER ))(spark, index)
416
426
417
- val updatedOutput = relation.plan.output
418
- .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
419
- .map(_.asInstanceOf [AttributeReference ])
427
+ val updatedOutput =
428
+ if (flatSchema.isDefined && SchemaUtils .containsNestedFieldNames(
429
+ SchemaUtils .prefixNestedFieldNames(flatSchema.get))) {
430
+ indexFsRelation.schema.flatMap { s =>
431
+ val exprId = getFieldPosition(index, s.name)
432
+ relation.plan.output.find(a => s.name.contains(a.name)).map { a =>
433
+ AttributeReference (s.name, s.dataType, a.nullable, a.metadata)(
434
+ ExprId (exprId),
435
+ a.qualifier)
436
+ }
437
+ }
438
+ } else {
439
+ relation.plan.output
440
+ .filter(attr => indexFsRelation.schema.fieldNames.contains(attr.name))
441
+ .map(_.asInstanceOf [AttributeReference ])
442
+ }
420
443
421
444
if (filesDeleted.isEmpty) {
422
445
relation.createLogicalRelation(indexFsRelation, updatedOutput)
@@ -428,6 +451,12 @@ object RuleUtils {
428
451
val filterForDeleted = Filter (Not (In (lineageAttr, deletedFileIds)), rel)
429
452
Project (updatedOutput, OptimizeIn (filterForDeleted))
430
453
}
454
+ case p : Project if provider.isSupportedProject(p) =>
455
+ transformProject(p, index)
456
+
457
+ case f : Filter if provider.isSupportedFilter(f) =>
458
+ transformFilter(f, index)
459
+
431
460
}
432
461
433
462
if (unhandledAppendedFiles.nonEmpty) {
@@ -501,11 +530,14 @@ object RuleUtils {
501
530
// Set the same output schema with the index plan to merge them using BucketUnion.
502
531
// Include partition columns for data loading.
503
532
val partitionColumns = relation.partitionSchema.map(_.name)
504
- val updatedSchema = StructType (relation.plan.schema.filter(col =>
505
- index.schema.contains(col) || relation.partitionSchema.contains(col)))
533
+ val updatedSchema = StructType (
534
+ relation.plan.schema.filter(col =>
535
+ index.schema.fieldNames.exists(n => n.contains(col.name)) ||
536
+ relation.partitionSchema.contains(col)))
506
537
val updatedOutput = relation.plan.output
507
538
.filter(attr =>
508
- index.schema.fieldNames.contains(attr.name) || partitionColumns.contains(attr.name))
539
+ index.schema.fieldNames.exists(n => n.contains(attr.name)) ||
540
+ partitionColumns.contains(attr.name))
509
541
.map(_.asInstanceOf [AttributeReference ])
510
542
val newRelation = relation.createHadoopFsRelation(
511
543
newLocation,
0 commit comments