From 78b83fa542a513efbfadf8e03358a4411175bbf3 Mon Sep 17 00:00:00 2001
From: Takuya Ueshin <ueshin@databricks.com>
Date: Thu, 25 Jul 2024 13:47:01 -0700
Subject: [PATCH] [SPARK-48996][SQL][PYTHON] Allow bare literals for __and__
 and __or__ of Column

### What changes were proposed in this pull request?

Allows bare literals for `__and__` and `__or__` of Column API in Spark Classic.

### Why are the changes needed?

Currently bare literals are not allowed for `__and__` and `__or__` of Column API in Spark Classic and need to wrap with `lit()` function. It should be allowed similar to other similar operators.

```py
>>> from pyspark.sql.functions import *
>>> c = col("c")
>>> c & True
Traceback (most recent call last):
...
py4j.Py4JException: Method and([class java.lang.Boolean]) does not exist

>>> c & lit(True)
Column<'and(c, true)'>
```

whereas other operators:

```py
>>> c + 1
Column<'`+`(c, 1)'>
>>> c + lit(1)
Column<'`+`(c, 1)'>
```

Spark Connect allows this.

```py
>>> c & True
Column<'and(c, True)'>
>>> c & lit(True)
Column<'and(c, True)'>
```

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Added the related tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47474 from ueshin/issues/SPARK-48996/literal_and_or.

Authored-by: Takuya Ueshin <ueshin@databricks.com>
Signed-off-by: Takuya Ueshin <ueshin@databricks.com>
---
 python/pyspark/sql/classic/column.py    | 16 ++++++++++++----
 python/pyspark/sql/tests/test_column.py |  8 ++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/classic/column.py b/python/pyspark/sql/classic/column.py
index c893050872f4e..92c57438c739c 100644
--- a/python/pyspark/sql/classic/column.py
+++ b/python/pyspark/sql/classic/column.py
@@ -309,12 +309,16 @@ def eqNullSafe(
     def __and__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("and", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("and", self, lit(other))
 
     def __or__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("or", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("or", self, lit(other))
 
     def __invert__(self) -> ParentColumn:
         return _func_op("not", self)
@@ -322,12 +326,16 @@ def __invert__(self) -> ParentColumn:
     def __rand__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("and", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("and", self, lit(other))
 
     def __ror__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("or", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("or", self, lit(other))
 
     # container operators
     def __contains__(self, item: Any) -> None:
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index ac599fab13578..ecfcae36c955a 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -94,6 +94,14 @@ def test_column_operators(self):
             cs.startswith("a"),
             cs.endswith("a"),
             ci.eqNullSafe(cs),
+            sf.col("b") & sf.lit(True),
+            sf.col("b") & True,
+            sf.lit(True) & sf.col("b"),
+            True & sf.col("b"),
+            sf.col("b") | sf.lit(True),
+            sf.col("b") | True,
+            sf.lit(True) | sf.col("b"),
+            True | sf.col("b"),
         )
         self.assertTrue(all(isinstance(c, Column) for c in css))
         self.assertTrue(isinstance(ci.cast(LongType()), Column))