From a45a3a3d60cb97b107a177ad16bfe36372bc3e9b Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Thu, 31 Aug 2023 10:51:44 +0800 Subject: [PATCH] [SPARK-45012][SQL] CheckAnalysis should throw inlined plan in AnalysisException ### What changes were proposed in this pull request? CheckAnalysis should throw inlined plan in AnalysisException ### Why are the changes needed? Before this change, the plan attached to AnalysisException is analyzed plan but not inlined. However, `CheckAnalysis` inlines and checks the plan, so if an exception is from `CheckAnalysis`, it should attach the inlined version of plan which is more useful to debug than the original analyzed plan, especially when there is CTE in inside the plan. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #42729 from amaliujia/improve_analyzer. Authored-by: Rui Wang Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/Analyzer.scala | 9 ++------- .../sql/catalyst/analysis/CheckAnalysis.scala | 17 +++++++++++++++-- .../postgreSQL/create_view.sql.out | 2 +- .../results/postgreSQL/create_view.sql.out | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index cf819c7346c6b..9a6d9c8b735be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -209,13 +209,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor if (plan.analyzed) return plan AnalysisHelper.markInAnalyzer { val analyzed = executeAndTrack(plan, tracker) - try { - checkAnalysis(analyzed) - analyzed - } catch { - case e: AnalysisException => - throw new ExtendedAnalysisException(e, analyzed) - } + checkAnalysis(analyzed) + analyzed } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 13999f391d9c2..038cd7d944af9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -20,6 +20,7 @@ import scala.collection.mutable import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.ExtendedAnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc} @@ -154,10 +155,22 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB cteMap.values.foreach { case (relation, refCount, _) => // If a CTE relation is never used, it will disappear after inline. Here we explicitly check // analysis for it, to make sure the entire query plan is valid. - if (refCount == 0) checkAnalysis0(relation.child) + try { + if (refCount == 0) checkAnalysis0(relation.child) + } catch { + case e: AnalysisException => + throw new ExtendedAnalysisException(e, relation.child) + } + } // Inline all CTEs in the plan to help check query plan structures in subqueries. - checkAnalysis0(inlineCTE(plan)) + val inlinedPlan = inlineCTE(plan) + try { + checkAnalysis0(inlinedPlan) + } catch { + case e: AnalysisException => + throw new ExtendedAnalysisException(e, inlinedPlan) + } plan.setAnalyzed() } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out index b199cb55f2a44..35c20597e2b6a 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out @@ -52,7 +52,7 @@ org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException CREATE VIEW key_dependent_view AS SELECT * FROM view_base_table GROUP BY key -- !query analysis -org.apache.spark.sql.AnalysisException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { "errorClass" : "MISSING_AGGREGATION", "sqlState" : "42803", diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out index bcd14c72a831b..2a83cc19b6cf2 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out @@ -52,7 +52,7 @@ CREATE VIEW key_dependent_view AS -- !query schema struct<> -- !query output -org.apache.spark.sql.AnalysisException +org.apache.spark.sql.catalyst.ExtendedAnalysisException { "errorClass" : "MISSING_AGGREGATION", "sqlState" : "42803",