@@ -115,44 +115,59 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
115
115
final def execute (): RDD [InternalRow ] = {
116
116
RDDOperationScope .withScope(sparkContext, nodeName, false , true ) {
117
117
prepare()
118
+ waitForSubqueries()
118
119
doExecute()
119
120
}
120
121
}
121
122
123
+ // All the subquries and their Future of results.
124
+ @ transient private val queryResults = ArrayBuffer [(ScalarSubquery , Future [Array [InternalRow ]])]()
125
+
126
+ /**
127
+ * Collects all the subqueries and create a Future to take the first two rows of them.
128
+ */
129
+ protected def prepareSubqueries (): Unit = {
130
+ val allSubqueries = expressions.flatMap(_.collect {case e : ScalarSubquery => e})
131
+ allSubqueries.foreach { e =>
132
+ val futureResult = Future {
133
+ // We only need the first row, try to take two rows so we can throw an exception if there
134
+ // are more than one rows returned.
135
+ e.executedPlan.executeTake(2 )
136
+ }(SparkPlan .subqueryExecutionContext)
137
+ queryResults += e -> futureResult
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Waits for all the subquires to finish and updates the results.
143
+ */
144
+ protected def waitForSubqueries (): Unit = {
145
+ // fill in the result of subqueries
146
+ queryResults.foreach {
147
+ case (e, futureResult) =>
148
+ val rows = Await .result(futureResult, Duration .Inf )
149
+ if (rows.length > 1 ) {
150
+ sys.error(s " more than one row returned by a subquery used as an expression: \n ${e.plan}" )
151
+ }
152
+ if (rows.length == 1 ) {
153
+ assert(rows(0 ).numFields == 1 , " Analyzer should make sure this only returns one column" )
154
+ e.updateResult(rows(0 ).get(0 , e.dataType))
155
+ } else {
156
+ // There is no rows returned, the result should be null.
157
+ e.updateResult(null )
158
+ }
159
+ }
160
+ queryResults.clear()
161
+ }
162
+
122
163
/**
123
164
* Prepare a SparkPlan for execution. It's idempotent.
124
165
*/
125
166
final def prepare (): Unit = {
126
167
if (prepareCalled.compareAndSet(false , true )) {
127
168
doPrepare()
128
-
129
- // collect all the subqueries and submit jobs to execute them in background
130
- val queryResults = ArrayBuffer [(ScalarSubquery , Future [Array [InternalRow ]])]()
131
- val allSubqueries = expressions.flatMap(_.collect {case e : ScalarSubquery => e})
132
- allSubqueries.foreach { e =>
133
- val futureResult = Future {
134
- e.plan.executeTake(2 )
135
- }(SparkPlan .subqueryExecutionContext)
136
- queryResults += e -> futureResult
137
- }
138
-
169
+ prepareSubqueries()
139
170
children.foreach(_.prepare())
140
-
141
- // fill in the result of subqueries
142
- queryResults.foreach {
143
- case (e, futureResult) =>
144
- val rows = Await .result(futureResult, Duration .Inf )
145
- if (rows.length > 1 ) {
146
- sys.error(s " more than one row returned by a subquery used as an expression: \n ${e.plan}" )
147
- }
148
- if (rows.length == 1 ) {
149
- assert(rows(0 ).numFields == 1 , " Analyzer should make sure this only returns one column" )
150
- e.updateResult(rows(0 ).get(0 , e.dataType))
151
- } else {
152
- // There is no rows returned, the result should be null.
153
- e.updateResult(null )
154
- }
155
- }
156
171
}
157
172
}
158
173
0 commit comments