Skip to content

Commit 59eaca9

Browse files
itholicilicmarkodb
authored andcommitted
[SPARK-48961][PYTHON] Make the parameter naming of PySparkException consistent with JVM
### What changes were proposed in this pull request? This PR proposes to make the parameter naming of `PySparkException` consistent with JVM ### Why are the changes needed? The parameter names of `PySparkException` are different from `SparkException` so there is an inconsistency when searching those parameters from error logs. SparkException: https://github.com/apache/spark/blob/6508b1f5e18731359354af0a7bcc1382bc4f356b/common/utils/src/main/scala/org/apache/spark/SparkException.scala#L27-L33 PySparkException: https://github.com/apache/spark/blob/6508b1f5e18731359354af0a7bcc1382bc4f356b/python/pyspark/errors/exceptions/base.py#L29-L40 ### Does this PR introduce _any_ user-facing change? The error parameter names are changed from: - `error_class` -> `errorClass` - `message_parameters` -> `messageParameters` - `query_contexts` -> `context` ### How was this patch tested? The existing CI should pass ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#47436 from itholic/SPARK-48961. Authored-by: Haejoon Lee <haejoon.lee@databricks.com> Signed-off-by: Haejoon Lee <haejoon.lee@databricks.com>
1 parent ba721bc commit 59eaca9

File tree

119 files changed

+2066
-2070
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+2066
-2070
lines changed

python/docs/source/development/contributing.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,8 @@ Throw with arbitrary error message:
279279
.. code-block:: python
280280
281281
class PySparkTestError(PySparkException):
282-
def __init__(self, error_class: str, message_parameters: Dict[str, str]):
283-
super().__init__(error_class=error_class, message_parameters=message_parameters)
282+
def __init__(self, errorClass: str, messageParameters: Dict[str, str]):
283+
super().__init__(errorClass=errorClass, messageParameters=messageParameters)
284284
285285
def getMessageParameters(self) -> Optional[Dict[str, str]]:
286286
return super().getMessageParameters()

python/docs/source/development/logger.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Example log entry:
5252
"file": "/path/to/file.py",
5353
"line": "17",
5454
"fragment": "divide"
55-
"error_class": "DIVIDE_BY_ZERO"
55+
"errorClass": "DIVIDE_BY_ZERO"
5656
},
5757
"exception": {
5858
"class": "Py4JJavaError",

python/pyspark/accumulators.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ def value(self) -> T:
149149
"""Get the accumulator's value; only usable in driver program"""
150150
if self._deserialized:
151151
raise PySparkRuntimeError(
152-
error_class="VALUE_NOT_ACCESSIBLE",
153-
message_parameters={
152+
errorClass="VALUE_NOT_ACCESSIBLE",
153+
messageParameters={
154154
"value": "Accumulator.value",
155155
},
156156
)
@@ -161,8 +161,8 @@ def value(self, value: T) -> None:
161161
"""Sets the accumulator's value; only usable in driver program"""
162162
if self._deserialized:
163163
raise PySparkRuntimeError(
164-
error_class="VALUE_NOT_ACCESSIBLE",
165-
message_parameters={
164+
errorClass="VALUE_NOT_ACCESSIBLE",
165+
messageParameters={
166166
"value": "Accumulator.value",
167167
},
168168
)

python/pyspark/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,8 @@ def setExecutorEnv(
188188
"""Set an environment variable to be passed to executors."""
189189
if (key is not None and pairs is not None) or (key is None and pairs is None):
190190
raise PySparkRuntimeError(
191-
error_class="KEY_VALUE_PAIR_REQUIRED",
192-
message_parameters={},
191+
errorClass="KEY_VALUE_PAIR_REQUIRED",
192+
messageParameters={},
193193
)
194194
elif key is not None:
195195
self.set("spark.executorEnv.{}".format(key), cast(str, value))

python/pyspark/core/broadcast.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def _from_id(bid: int) -> "Broadcast[Any]":
5959

6060
if bid not in _broadcastRegistry:
6161
raise PySparkRuntimeError(
62-
error_class="BROADCAST_VARIABLE_NOT_LOADED",
63-
message_parameters={
62+
errorClass="BROADCAST_VARIABLE_NOT_LOADED",
63+
messageParameters={
6464
"variable": str(bid),
6565
},
6666
)
@@ -299,8 +299,8 @@ def unpersist(self, blocking: bool = False) -> None:
299299
"""
300300
if self._jbroadcast is None:
301301
raise PySparkRuntimeError(
302-
error_class="INVALID_BROADCAST_OPERATION",
303-
message_parameters={"operation": "unpersisted"},
302+
errorClass="INVALID_BROADCAST_OPERATION",
303+
messageParameters={"operation": "unpersisted"},
304304
)
305305
self._jbroadcast.unpersist(blocking)
306306

@@ -329,17 +329,17 @@ def destroy(self, blocking: bool = False) -> None:
329329
"""
330330
if self._jbroadcast is None:
331331
raise PySparkRuntimeError(
332-
error_class="INVALID_BROADCAST_OPERATION",
333-
message_parameters={"operation": "destroyed"},
332+
errorClass="INVALID_BROADCAST_OPERATION",
333+
messageParameters={"operation": "destroyed"},
334334
)
335335
self._jbroadcast.destroy(blocking)
336336
os.unlink(self._path)
337337

338338
def __reduce__(self) -> Tuple[Callable[[int], "Broadcast[T]"], Tuple[int]]:
339339
if self._jbroadcast is None:
340340
raise PySparkRuntimeError(
341-
error_class="INVALID_BROADCAST_OPERATION",
342-
message_parameters={"operation": "serialized"},
341+
errorClass="INVALID_BROADCAST_OPERATION",
342+
messageParameters={"operation": "serialized"},
343343
)
344344
assert self._pickle_registry is not None
345345
self._pickle_registry.add(self)

python/pyspark/core/context.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ def __init__(
184184
):
185185
if "SPARK_CONNECT_MODE_ENABLED" in os.environ and "SPARK_LOCAL_REMOTE" not in os.environ:
186186
raise PySparkRuntimeError(
187-
error_class="CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT",
188-
message_parameters={},
187+
errorClass="CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT",
188+
messageParameters={},
189189
)
190190

191191
if conf is None or conf.get("spark.executor.allowSparkContext", "false").lower() != "true":
@@ -271,13 +271,13 @@ def _do_init(
271271
# Check that we have at least the required parameters
272272
if not self._conf.contains("spark.master"):
273273
raise PySparkRuntimeError(
274-
error_class="MASTER_URL_NOT_SET",
275-
message_parameters={},
274+
errorClass="MASTER_URL_NOT_SET",
275+
messageParameters={},
276276
)
277277
if not self._conf.contains("spark.app.name"):
278278
raise PySparkRuntimeError(
279-
error_class="APPLICATION_NAME_NOT_SET",
280-
message_parameters={},
279+
errorClass="APPLICATION_NAME_NOT_SET",
280+
messageParameters={},
281281
)
282282

283283
# Read back our properties from the conf in case we loaded some of them from
@@ -465,8 +465,8 @@ def _ensure_initialized(
465465
def __getnewargs__(self) -> NoReturn:
466466
# This method is called when attempting to pickle SparkContext, which is always an error:
467467
raise PySparkRuntimeError(
468-
error_class="CONTEXT_ONLY_VALID_ON_DRIVER",
469-
message_parameters={},
468+
errorClass="CONTEXT_ONLY_VALID_ON_DRIVER",
469+
messageParameters={},
470470
)
471471

472472
def __enter__(self) -> "SparkContext":
@@ -2535,8 +2535,8 @@ def show_profiles(self) -> None:
25352535
self.profiler_collector.show_profiles()
25362536
else:
25372537
raise PySparkRuntimeError(
2538-
error_class="INCORRECT_CONF_FOR_PROFILE",
2539-
message_parameters={},
2538+
errorClass="INCORRECT_CONF_FOR_PROFILE",
2539+
messageParameters={},
25402540
)
25412541

25422542
def dump_profiles(self, path: str) -> None:
@@ -2552,8 +2552,8 @@ def dump_profiles(self, path: str) -> None:
25522552
self.profiler_collector.dump_profiles(path)
25532553
else:
25542554
raise PySparkRuntimeError(
2555-
error_class="INCORRECT_CONF_FOR_PROFILE",
2556-
message_parameters={},
2555+
errorClass="INCORRECT_CONF_FOR_PROFILE",
2556+
messageParameters={},
25572557
)
25582558

25592559
def getConf(self) -> SparkConf:
@@ -2591,8 +2591,8 @@ def _assert_on_driver() -> None:
25912591
"""
25922592
if TaskContext.get() is not None:
25932593
raise PySparkRuntimeError(
2594-
error_class="CONTEXT_ONLY_VALID_ON_DRIVER",
2595-
message_parameters={},
2594+
errorClass="CONTEXT_ONLY_VALID_ON_DRIVER",
2595+
messageParameters={},
25962596
)
25972597

25982598

python/pyspark/core/rdd.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ def portable_hash(x: Hashable) -> int:
137137

138138
if "PYTHONHASHSEED" not in os.environ:
139139
raise PySparkRuntimeError(
140-
error_class="PYTHON_HASH_SEED_NOT_SET",
141-
message_parameters={},
140+
errorClass="PYTHON_HASH_SEED_NOT_SET",
141+
messageParameters={},
142142
)
143143

144144
if x is None:
@@ -246,8 +246,8 @@ def __repr__(self) -> str:
246246
def __getnewargs__(self) -> NoReturn:
247247
# This method is called when attempting to pickle an RDD, which is always an error:
248248
raise PySparkRuntimeError(
249-
error_class="RDD_TRANSFORM_ONLY_VALID_ON_DRIVER",
250-
message_parameters={},
249+
errorClass="RDD_TRANSFORM_ONLY_VALID_ON_DRIVER",
250+
messageParameters={},
251251
)
252252

253253
@property
@@ -1581,8 +1581,8 @@ def check_return_code() -> Iterable[int]:
15811581
pipe.wait()
15821582
if checkCode and pipe.returncode:
15831583
raise PySparkRuntimeError(
1584-
error_class="PIPE_FUNCTION_EXITED",
1585-
message_parameters={
1584+
errorClass="PIPE_FUNCTION_EXITED",
1585+
messageParameters={
15861586
"func_name": command,
15871587
"error_code": str(pipe.returncode),
15881588
},
@@ -5105,8 +5105,8 @@ def toDF(
51055105
self: "RDD[Any]", schema: Optional[Any] = None, sampleRatio: Optional[float] = None
51065106
) -> "DataFrame":
51075107
raise PySparkRuntimeError(
5108-
error_class="CALL_BEFORE_INITIALIZE",
5109-
message_parameters={
5108+
errorClass="CALL_BEFORE_INITIALIZE",
5109+
messageParameters={
51105110
"func_name": "RDD.toDF",
51115111
"object": "SparkSession",
51125112
},

python/pyspark/errors/exceptions/base.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,24 @@ class PySparkException(Exception):
3434
def __init__(
3535
self,
3636
message: Optional[str] = None,
37-
error_class: Optional[str] = None,
38-
message_parameters: Optional[Dict[str, str]] = None,
39-
query_contexts: Optional[List["QueryContext"]] = None,
37+
errorClass: Optional[str] = None,
38+
messageParameters: Optional[Dict[str, str]] = None,
39+
contexts: Optional[List["QueryContext"]] = None,
4040
):
41-
if query_contexts is None:
42-
query_contexts = []
41+
if contexts is None:
42+
contexts = []
4343
self._error_reader = ErrorClassesReader()
4444

4545
if message is None:
4646
self._message = self._error_reader.get_error_message(
47-
cast(str, error_class), cast(Dict[str, str], message_parameters)
47+
cast(str, errorClass), cast(Dict[str, str], messageParameters)
4848
)
4949
else:
5050
self._message = message
5151

52-
self._error_class = error_class
53-
self._message_parameters = message_parameters
54-
self._query_contexts = query_contexts
52+
self._errorClass = errorClass
53+
self._messageParameters = messageParameters
54+
self._contexts = contexts
5555

5656
def getErrorClass(self) -> Optional[str]:
5757
"""
@@ -66,7 +66,7 @@ def getErrorClass(self) -> Optional[str]:
6666
:meth:`PySparkException.getQueryContext`
6767
:meth:`PySparkException.getSqlState`
6868
"""
69-
return self._error_class
69+
return self._errorClass
7070

7171
def getMessageParameters(self) -> Optional[Dict[str, str]]:
7272
"""
@@ -81,7 +81,7 @@ def getMessageParameters(self) -> Optional[Dict[str, str]]:
8181
:meth:`PySparkException.getQueryContext`
8282
:meth:`PySparkException.getSqlState`
8383
"""
84-
return self._message_parameters
84+
return self._messageParameters
8585

8686
def getSqlState(self) -> Optional[str]:
8787
"""
@@ -128,28 +128,28 @@ def getQueryContext(self) -> List["QueryContext"]:
128128
:meth:`PySparkException.getMessage`
129129
:meth:`PySparkException.getSqlState`
130130
"""
131-
return self._query_contexts
131+
return self._contexts
132132

133133
def _log_exception(self) -> None:
134-
query_contexts = self.getQueryContext()
135-
query_context = query_contexts[0] if len(query_contexts) != 0 else None
136-
if query_context:
137-
if query_context.contextType().name == "DataFrame":
134+
contexts = self.getQueryContext()
135+
context = contexts[0] if len(contexts) != 0 else None
136+
if context:
137+
if context.contextType().name == "DataFrame":
138138
logger = PySparkLogger.getLogger("DataFrameQueryContextLogger")
139-
call_site = query_context.callSite().split(":")
139+
call_site = context.callSite().split(":")
140140
line = call_site[1] if len(call_site) == 2 else ""
141141
logger.exception(
142142
self.getMessage(),
143143
file=call_site[0],
144144
line=line,
145-
fragment=query_context.fragment(),
146-
error_class=self.getErrorClass(),
145+
fragment=context.fragment(),
146+
errorClass=self.getErrorClass(),
147147
)
148148
else:
149149
logger = PySparkLogger.getLogger("SQLQueryContextLogger")
150150
logger.exception(
151151
self.getMessage(),
152-
error_class=self.getErrorClass(),
152+
errorClass=self.getErrorClass(),
153153
)
154154

155155
def __str__(self) -> str:
@@ -299,11 +299,11 @@ class PySparkAssertionError(PySparkException, AssertionError):
299299
def __init__(
300300
self,
301301
message: Optional[str] = None,
302-
error_class: Optional[str] = None,
303-
message_parameters: Optional[Dict[str, str]] = None,
302+
errorClass: Optional[str] = None,
303+
messageParameters: Optional[Dict[str, str]] = None,
304304
data: Optional[Iterable["Row"]] = None,
305305
):
306-
super().__init__(message, error_class, message_parameters)
306+
super().__init__(message, errorClass, messageParameters)
307307
self.data = data
308308

309309

python/pyspark/errors/exceptions/captured.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,11 @@ def getMessage(self) -> str:
146146
if self._origin is not None and is_instance_of(
147147
gw, self._origin, "org.apache.spark.SparkThrowable"
148148
):
149-
error_class = self._origin.getErrorClass()
150-
message_parameters = self._origin.getMessageParameters()
149+
errorClass = self._origin.getErrorClass()
150+
messageParameters = self._origin.getMessageParameters()
151151

152152
error_message = gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage(
153-
error_class, message_parameters
153+
errorClass, messageParameters
154154
)
155155

156156
return error_message

0 commit comments

Comments
 (0)