apache · itholic · Mar 4, 2024 · Apr 1, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py
@@ -409,5 +409,13 @@ def fragment(self) -> str:
     def callSite(self) -> str:
         return str(self._q.callSite())
 
+    def pysparkFragment(self) -> Optional[str]:  # type: ignore[return]
+        if self.contextType() == QueryContextType.DataFrame:
+            return str(self._q.pysparkFragment())
+
+    def pysparkCallSite(self) -> Optional[str]:  # type: ignore[return]
+        if self.contextType() == QueryContextType.DataFrame:
+            return str(self._q.pysparkCallSite())
+
     def summary(self) -> str:
         return str(self._q.summary())
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
@@ -18,6 +18,7 @@
 import sys
 import json
 import warnings
+import inspect
 from typing import (
     cast,
     overload,
@@ -174,16 +175,50 @@ def _bin_op(
     ["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"
 ]:
     """Create a method for given binary operator"""
+    binary_operator_map = {
+        "plus": "+",
+        "minus": "-",
+        "divide": "/",
+        "multiply": "*",
+        "mod": "%",
+        "equalTo": "=",
+        "lt": "<",
+        "leq": "<=",
+        "geq": ">=",
+        "gt": ">",
+        "eqNullSafe": "<=>",
+        "bitwiseOR": "|",
+        "bitwiseAND": "&",
+        "bitwiseXOR": "^",
+        # Just following JVM rule even if the names of source and target are the same.
+        "and": "and",
+        "or": "or",
+    }
 
     def _(
         self: "Column",
         other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
     ) -> "Column":
         jc = other._jc if isinstance(other, Column) else other
-        njc = getattr(self._jc, name)(jc)
+        if name in binary_operator_map:
+            from pyspark.sql import SparkSession
+
+            spark = SparkSession._getActiveSessionOrCreate()
+            stack = list(reversed(inspect.stack()))
+            depth = int(
+                spark.conf.get("spark.sql.stackTracesInDataFrameContext")  # type: ignore[arg-type]
+            )
+            selected_frames = stack[:depth]
+            call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames]
+            call_site_str = "\n".join(call_sites)
+
+            njc = getattr(self._jc, "fn")(binary_operator_map[name], jc, name, call_site_str)
+        else:
+            njc = getattr(self._jc, name)(jc)
         return Column(njc)
 
     _.__doc__ = doc
+    _.__name__ = name
     return _
 
 

diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
@@ -30,6 +30,10 @@ def test_help_command(self):
     def test_toDF_with_schema_string(self):
         super().test_toDF_with_schema_string()
 
+    @unittest.skip("Spark Connect does not support DataFrameQueryContext currently.")
+    def test_dataframe_error_context(self):
+        super().test_dataframe_error_context()
+
 
 if __name__ == "__main__":
     import unittest