[SPARK-48843] Prevent infinite loop with BindParameters

nemanja-boric-databricks · cloud-fan · cloud-fan · commit 1e15e3f3eb2b · 2024-07-10T20:39:17.000+08:00
### What changes were proposed in this pull request? In order to resolve the named parameters on the subtree, BindParameters recurses into the subtrees and tries to match the pattern with the named parameters. If there's no named parameter in the current level, the rule tries to return the unchanged plan. However, instead of returning the current plan object, the rule always returns the captured root plan node, leading into the infinite recursion. ### Why are the changes needed? Infinite recursion with the named parameters and the global limit. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47271 from nemanja-boric-databricks/fix-bind. Lead-authored-by: Nemanja Boric <nemanja.boric@databricks.com> Co-authored-by: Wenchen Fan <cloud0fan@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> (cherry picked from commit a39f70d) Signed-off-by: Wenchen Fan <wenchen@databricks.com>
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -1242,6 +1242,13 @@ def test_sql_with_named_args(self):
         df2 = self.spark.sql("SELECT * FROM range(10) WHERE id > :minId", args={"minId": 7})
         self.assert_eq(df.toPandas(), df2.toPandas())
 
+    def test_namedargs_with_global_limit(self):
+        sqlText = """SELECT * FROM VALUES (TIMESTAMP('2022-12-25 10:30:00'), 1) as tab(date, val)
+         where val = :val"""
+        df = self.connect.sql(sqlText, args={"val": 1})
+        df2 = self.spark.sql(sqlText, args={"val": 1})
+        self.assert_eq(df.toPandas(), df2.toPandas())
+
     def test_sql_with_pos_args(self):
         df = self.connect.sql("SELECT * FROM range(10) WHERE id > ?", args=[7])
         df2 = self.spark.sql("SELECT * FROM range(10) WHERE id > ?", args=[7])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -136,7 +136,7 @@ object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
             args(posToIndex(pos))
         }
 
-      case _ => plan
+      case other => other
     }
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -136,7 +136,7 @@ object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {`
`136`	`136`	`args(posToIndex(pos))`
`137`	`137`	`}`
`138`	`138`
`139`		`- case _ => plan`
	`139`	`+ case other => other`
`140`	`140`	`}`
`141`	`141`	`}`
`142`	`142`	`}`