Skip to content

Commit fe286df

Browse files
committed
Address comments.
1 parent 7908788 commit fe286df

File tree

2 files changed

+9
-10
lines changed

2 files changed

+9
-10
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -264,15 +264,13 @@ object GeneratorNestedColumnAliasing {
264264
val exprsToPrune = projectList ++ g.generator.children
265265
NestedColumnAliasing.getAliasSubMap(exprsToPrune).map {
266266
case (nestedFieldToAlias, attrToAliases) =>
267-
// Defer updating `Generate.unrequiredChildIndex` to next round of `ColumnPruning`.
268-
269267
val (nestedFieldsOnGenerator, nestedFieldsNotOnGenerator) =
270268
nestedFieldOnGeneratorOutput(nestedFieldToAlias, g.qualifiedGeneratorOutput)
271269
val (attrToAliasesOnGenerator, attrToAliasesNotOnGenerator) =
272270
aliasesOnGeneratorOutput(attrToAliases, g.qualifiedGeneratorOutput)
273271

274-
// Push nested column accessors through `Generator`. We cannot prune on `Generator`'s
275-
// output.
272+
// Push nested column accessors through `Generator`.
273+
// Defer updating `Generate.unrequiredChildIndex` to next round of `ColumnPruning`.
276274
val newChild = NestedColumnAliasing.replaceWithAliases(g,
277275
nestedFieldsNotOnGenerator, attrToAliasesNotOnGenerator)
278276
val pushedThrough = Project(NestedColumnAliasing
@@ -282,15 +280,18 @@ object GeneratorNestedColumnAliasing {
282280
// For multiple field case, we cannot directly move field extractor into
283281
// the generator expression. A workaround is to re-construct array of struct
284282
// from multiple fields. But it will be more complicated and may not worth.
285-
if (nestedFieldsOnGenerator.size == 1) {
283+
// TODO(SPARK-34956): support multiple fields.
284+
if (nestedFieldsOnGenerator.size > 1 || nestedFieldsOnGenerator.size == 0) {
285+
pushedThrough
286+
} else {
286287
// Only one nested column accessor.
287288
// E.g., df.select(explode($"items").as("item")).select($"item.a")
288289
pushedThrough match {
289290
case p @ Project(_, newG: Generate) =>
290291
// Replace the child expression of `ExplodeBase` generator with
291292
// nested column accessor.
292-
// E.g., df.select(explode($"items").as("item")) =>
293-
// df.select(explode($"items.a").as("item"))
293+
// E.g., df.select(explode($"items").as("item")).select($"item.a") =>
294+
// df.select(explode($"items.a").as("item.a"))
294295
val rewrittenG = newG.transformExpressions {
295296
case e: ExplodeBase =>
296297
val extractor = nestedFieldsOnGenerator.head._1.transformUp {
@@ -322,8 +323,6 @@ object GeneratorNestedColumnAliasing {
322323

323324
case _ => pushedThrough
324325
}
325-
} else {
326-
pushedThrough
327326
}
328327
}
329328

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
329329
comparePlans(optimized, expected)
330330
}
331331

332-
test("Nested field pruning for Project and Generate: not prune on generator output") {
332+
test("Nested field pruning for Project and Generate: multiple-field case is not supported") {
333333
val companies = LocalRelation(
334334
'id.int,
335335
'employers.array(employer))

0 commit comments

Comments
 (0)