From 78b83fa542a513efbfadf8e03358a4411175bbf3 Mon Sep 17 00:00:00 2001 From: Takuya Ueshin Date: Thu, 25 Jul 2024 13:47:01 -0700 Subject: [PATCH] [SPARK-48996][SQL][PYTHON] Allow bare literals for __and__ and __or__ of Column ### What changes were proposed in this pull request? Allows bare literals for `__and__` and `__or__` of Column API in Spark Classic. ### Why are the changes needed? Currently bare literals are not allowed for `__and__` and `__or__` of Column API in Spark Classic and need to wrap with `lit()` function. It should be allowed similar to other similar operators. ```py >>> from pyspark.sql.functions import * >>> c = col("c") >>> c & True Traceback (most recent call last): ... py4j.Py4JException: Method and([class java.lang.Boolean]) does not exist >>> c & lit(True) Column<'and(c, true)'> ``` whereas other operators: ```py >>> c + 1 Column<'`+`(c, 1)'> >>> c + lit(1) Column<'`+`(c, 1)'> ``` Spark Connect allows this. ```py >>> c & True Column<'and(c, True)'> >>> c & lit(True) Column<'and(c, True)'> ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Added the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47474 from ueshin/issues/SPARK-48996/literal_and_or. Authored-by: Takuya Ueshin Signed-off-by: Takuya Ueshin --- python/pyspark/sql/classic/column.py | 16 ++++++++++++---- python/pyspark/sql/tests/test_column.py | 8 ++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/classic/column.py b/python/pyspark/sql/classic/column.py index c893050872f4e..92c57438c739c 100644 --- a/python/pyspark/sql/classic/column.py +++ b/python/pyspark/sql/classic/column.py @@ -309,12 +309,16 @@ def eqNullSafe( def __and__( self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"] ) -> ParentColumn: - return _bin_op("and", self, other) + from pyspark.sql.functions import lit + + return _bin_op("and", self, lit(other)) def __or__( self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"] ) -> ParentColumn: - return _bin_op("or", self, other) + from pyspark.sql.functions import lit + + return _bin_op("or", self, lit(other)) def __invert__(self) -> ParentColumn: return _func_op("not", self) @@ -322,12 +326,16 @@ def __invert__(self) -> ParentColumn: def __rand__( self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"] ) -> ParentColumn: - return _bin_op("and", self, other) + from pyspark.sql.functions import lit + + return _bin_op("and", self, lit(other)) def __ror__( self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"] ) -> ParentColumn: - return _bin_op("or", self, other) + from pyspark.sql.functions import lit + + return _bin_op("or", self, lit(other)) # container operators def __contains__(self, item: Any) -> None: diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index ac599fab13578..ecfcae36c955a 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -94,6 +94,14 @@ def test_column_operators(self): cs.startswith("a"), cs.endswith("a"), ci.eqNullSafe(cs), + sf.col("b") & sf.lit(True), + sf.col("b") & True, + sf.lit(True) & sf.col("b"), + True & sf.col("b"), + sf.col("b") | sf.lit(True), + sf.col("b") | True, + sf.lit(True) | sf.col("b"), + True | sf.col("b"), ) self.assertTrue(all(isinstance(c, Column) for c in css)) self.assertTrue(isinstance(ci.cast(LongType()), Column))