Skip to content

Commit

Permalink
[SPARK-45012][SQL] CheckAnalysis should throw inlined plan in Analysi…
Browse files Browse the repository at this point in the history
…sException

### What changes were proposed in this pull request?

CheckAnalysis should throw inlined plan in AnalysisException

### Why are the changes needed?

Before this change, the plan attached to AnalysisException is analyzed plan but not inlined. However, `CheckAnalysis` inlines and checks the plan, so if an exception is from `CheckAnalysis`, it should attach the inlined version of plan which is more useful to debug than the original analyzed plan, especially when there is CTE in inside the plan.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UT

### Was this patch authored or co-authored using generative AI tooling?

No

Closes apache#42729 from amaliujia/improve_analyzer.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
amaliujia authored and cloud-fan committed Aug 31, 2023
1 parent 00fb185 commit a45a3a3
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
if (plan.analyzed) return plan
AnalysisHelper.markInAnalyzer {
val analyzed = executeAndTrack(plan, tracker)
try {
checkAnalysis(analyzed)
analyzed
} catch {
case e: AnalysisException =>
throw new ExtendedAnalysisException(e, analyzed)
}
checkAnalysis(analyzed)
analyzed
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import scala.collection.mutable

import org.apache.spark.SparkException
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.ExtendedAnalysisException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc}
Expand Down Expand Up @@ -154,10 +155,22 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
cteMap.values.foreach { case (relation, refCount, _) =>
// If a CTE relation is never used, it will disappear after inline. Here we explicitly check
// analysis for it, to make sure the entire query plan is valid.
if (refCount == 0) checkAnalysis0(relation.child)
try {
if (refCount == 0) checkAnalysis0(relation.child)
} catch {
case e: AnalysisException =>
throw new ExtendedAnalysisException(e, relation.child)
}

}
// Inline all CTEs in the plan to help check query plan structures in subqueries.
checkAnalysis0(inlineCTE(plan))
val inlinedPlan = inlineCTE(plan)
try {
checkAnalysis0(inlinedPlan)
} catch {
case e: AnalysisException =>
throw new ExtendedAnalysisException(e, inlinedPlan)
}
plan.setAnalyzed()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
CREATE VIEW key_dependent_view AS
SELECT * FROM view_base_table GROUP BY key
-- !query analysis
org.apache.spark.sql.AnalysisException
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ CREATE VIEW key_dependent_view AS
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
Expand Down

0 comments on commit a45a3a3

Please sign in to comment.