@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
23
23
import org .apache .spark .sql .catalyst .expressions ._
24
24
import org .apache .spark .sql .catalyst .plans .logical ._
25
25
import org .apache .spark .sql .catalyst .rules ._
26
- import org .apache .spark .sql .types .{ ArrayType , StructField , StructType , IntegerType }
26
+ import org .apache .spark .sql .types ._
27
27
28
28
/**
29
29
* A trivial [[Analyzer ]] with an [[EmptyCatalog ]] and [[EmptyFunctionRegistry ]]. Used for testing
@@ -66,32 +66,82 @@ class Analyzer(catalog: Catalog,
66
66
typeCoercionRules ++
67
67
extendedRules : _* ),
68
68
Batch (" Check Analysis" , Once ,
69
- CheckResolution ::
70
- CheckAggregation ::
71
- Nil : _* ),
72
- Batch (" AnalysisOperators" , fixedPoint,
73
- EliminateAnalysisOperators )
69
+ CheckResolution ),
70
+ Batch (" Remove SubQueries" , fixedPoint,
71
+ EliminateSubQueries )
74
72
)
75
73
76
74
/**
77
75
* Makes sure all attributes and logical plans have been resolved.
78
76
*/
79
77
object CheckResolution extends Rule [LogicalPlan ] {
78
+ def failAnalysis (msg : String ) = { throw new AnalysisException (msg) }
79
+
80
80
def apply (plan : LogicalPlan ): LogicalPlan = {
81
- plan.transformUp {
82
- case p if p.expressions.exists(! _.resolved) =>
83
- val missing = p.expressions.filterNot(_.resolved).map(_.prettyString).mkString(" ," )
84
- val from = p.inputSet.map(_.name).mkString(" {" , " , " , " }" )
85
-
86
- throw new AnalysisException (s " Cannot resolve ' $missing' given input columns $from" )
87
- case p if ! p.resolved && p.childrenResolved =>
88
- throw new AnalysisException (s " Unresolved operator in the query plan ${p.simpleString}" )
89
- } match {
90
- // As a backstop, use the root node to check that the entire plan tree is resolved.
91
- case p if ! p.resolved =>
92
- throw new AnalysisException (s " Unresolved operator in the query plan ${p.simpleString}" )
93
- case p => p
81
+ // We transform up and order the rules so as to catch the first possible failure instead
82
+ // of the result of cascading resolution failures.
83
+ plan.foreachUp {
84
+ case operator : LogicalPlan =>
85
+ operator transformExpressionsUp {
86
+ case a : Attribute if ! a.resolved =>
87
+ val from = operator.inputSet.map(_.name).mkString(" , " )
88
+ failAnalysis(s " cannot resolve ' ${a.prettyString}' given input columns $from" )
89
+
90
+ case c : Cast if ! c.resolved =>
91
+ failAnalysis(
92
+ s " invalid cast from ${c.child.dataType.simpleString} to ${c.dataType.simpleString}" )
93
+
94
+ case b : BinaryExpression if ! b.resolved =>
95
+ failAnalysis(
96
+ s " invalid expression ${b.prettyString} " +
97
+ s " between ${b.left.simpleString} and ${b.right.simpleString}" )
98
+ }
99
+
100
+ operator match {
101
+ case f : Filter if f.condition.dataType != BooleanType =>
102
+ failAnalysis(
103
+ s " filter expression ' ${f.condition.prettyString}' " +
104
+ s " of type ${f.condition.dataType.simpleString} is not a boolean. " )
105
+
106
+ case aggregatePlan @ Aggregate (groupingExprs, aggregateExprs, child) =>
107
+ def checkValidAggregateExpression (expr : Expression ): Unit = expr match {
108
+ case _ : AggregateExpression => // OK
109
+ case e : Attribute if ! groupingExprs.contains(e) =>
110
+ failAnalysis(
111
+ s " expression ' ${e.prettyString}' is neither present in the group by, " +
112
+ s " nor is it an aggregate function. " +
113
+ " Add to group by or wrap in first() if you don't care which value you get." )
114
+ case e if groupingExprs.contains(e) => // OK
115
+ case e if e.references.isEmpty => // OK
116
+ case e => e.children.foreach(checkValidAggregateExpression)
117
+ }
118
+
119
+ val cleaned = aggregateExprs.map(_.transform {
120
+ // Should trim aliases around `GetField`s. These aliases are introduced while
121
+ // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
122
+ // (Should we just turn `GetField` into a `NamedExpression`?)
123
+ case Alias (g, _) => g
124
+ })
125
+
126
+ cleaned.foreach(checkValidAggregateExpression)
127
+
128
+ case o if o.children.nonEmpty &&
129
+ ! o.references.filter(_.name != " grouping__id" ).subsetOf(o.inputSet) =>
130
+ val missingAttributes = (o.references -- o.inputSet).map(_.prettyString).mkString(" ," )
131
+ val input = o.inputSet.map(_.prettyString).mkString(" ," )
132
+
133
+ failAnalysis(s " resolved attributes $missingAttributes missing from $input" )
134
+
135
+ // Catch all
136
+ case o if ! o.resolved =>
137
+ failAnalysis(
138
+ s " unresolved operator ${operator.simpleString}" )
139
+
140
+ case _ => // Analysis successful!
141
+ }
94
142
}
143
+
144
+ plan
95
145
}
96
146
}
97
147
@@ -192,45 +242,14 @@ class Analyzer(catalog: Catalog,
192
242
}
193
243
}
194
244
195
- /**
196
- * Checks for non-aggregated attributes with aggregation
197
- */
198
- object CheckAggregation extends Rule [LogicalPlan ] {
199
- def apply (plan : LogicalPlan ): LogicalPlan = {
200
- plan.transform {
201
- case aggregatePlan @ Aggregate (groupingExprs, aggregateExprs, child) =>
202
- def isValidAggregateExpression (expr : Expression ): Boolean = expr match {
203
- case _ : AggregateExpression => true
204
- case e : Attribute => groupingExprs.contains(e)
205
- case e if groupingExprs.contains(e) => true
206
- case e if e.references.isEmpty => true
207
- case e => e.children.forall(isValidAggregateExpression)
208
- }
209
-
210
- aggregateExprs.find { e =>
211
- ! isValidAggregateExpression(e.transform {
212
- // Should trim aliases around `GetField`s. These aliases are introduced while
213
- // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
214
- // (Should we just turn `GetField` into a `NamedExpression`?)
215
- case Alias (g : GetField , _) => g
216
- })
217
- }.foreach { e =>
218
- throw new TreeNodeException (plan, s " Expression not in GROUP BY: $e" )
219
- }
220
-
221
- aggregatePlan
222
- }
223
- }
224
- }
225
-
226
245
/**
227
246
* Replaces [[UnresolvedRelation ]]s with concrete relations from the catalog.
228
247
*/
229
248
object ResolveRelations extends Rule [LogicalPlan ] {
230
249
def apply (plan : LogicalPlan ): LogicalPlan = plan transform {
231
250
case i @ InsertIntoTable (UnresolvedRelation (tableIdentifier, alias), _, _, _) =>
232
251
i.copy(
233
- table = EliminateAnalysisOperators (catalog.lookupRelation(tableIdentifier, alias)))
252
+ table = EliminateSubQueries (catalog.lookupRelation(tableIdentifier, alias)))
234
253
case UnresolvedRelation (tableIdentifier, alias) =>
235
254
catalog.lookupRelation(tableIdentifier, alias)
236
255
}
@@ -477,7 +496,7 @@ class Analyzer(catalog: Catalog,
477
496
* only required to provide scoping information for attributes and can be removed once analysis is
478
497
* complete.
479
498
*/
480
- object EliminateAnalysisOperators extends Rule [LogicalPlan ] {
499
+ object EliminateSubQueries extends Rule [LogicalPlan ] {
481
500
def apply (plan : LogicalPlan ): LogicalPlan = plan transform {
482
501
case Subquery (_, child) => child
483
502
}
0 commit comments