Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions python/pyspark/pandas/data_type_ops/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import numbers
from abc import ABCMeta
from typing import Any, Optional, Union
from typing import Any, Optional, Union, cast
from itertools import chain

import numpy as np
Expand Down Expand Up @@ -53,7 +53,6 @@
handle_dtype_as_extension_dtype,
spark_type_to_pandas_dtype,
)
from pyspark.pandas.utils import is_ansi_mode_enabled

if extension_dtypes_available:
from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
Expand Down Expand Up @@ -424,9 +423,14 @@ def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError(">= can not be applied to %s." % self.pretty_name)

def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
if is_ansi_mode_enabled(left._internal.spark_frame.sparkSession):
if _should_return_all_false(left, right):
return left._with_new_scol(F.lit(False)).rename(None) # type: ignore[attr-defined]
from pyspark.pandas.base import IndexOpsMixin

if _should_return_all_false(left, right):
left_scol = left._with_new_scol(F.lit(False))
if isinstance(right, IndexOpsMixin):
return left_scol.rename(None) # type: ignore[attr-defined]
else:
return cast(SeriesOrIndex, left_scol)

if isinstance(right, (list, tuple)):
from pyspark.pandas.series import first_series, scol_for
Expand Down Expand Up @@ -521,10 +525,17 @@ def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
return column_op(PySparkColumn.__eq__)(left, right)

def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
from pyspark.pandas.base import column_op, IndexOpsMixin

_sanitize_list_like(right)

if _should_return_all_false(left, right):
left_scol = left._with_new_scol(F.lit(True))
if isinstance(right, IndexOpsMixin):
return left_scol.rename(None) # type: ignore[attr-defined]
else:
return cast(SeriesOrIndex, left_scol)

return column_op(PySparkColumn.__ne__)(left, right)

def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
Expand Down
24 changes: 15 additions & 9 deletions python/pyspark/pandas/data_type_ops/num_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,16 @@ def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
if not isinstance(right, IndexOpsMixin) and is_list_like(right):
return super().eq(left, right)
else:
if _should_return_all_false(left, right):
left_scol = left._with_new_scol(F.lit(False))
if isinstance(right, IndexOpsMixin):
# When comparing with another Series/Index, drop the name
# to align with pandas behavior
return left_scol.rename(None) # type: ignore[attr-defined]
else:
# When comparing with scalar-like, keep the name of left operand
return cast(SeriesOrIndex, left_scol)
if is_ansi_mode_enabled(left._internal.spark_frame.sparkSession):
if _should_return_all_false(left, right):
left_scol = left._with_new_scol(F.lit(False))
if isinstance(right, IndexOpsMixin):
# When comparing with another Series/Index, drop the name
# to align with pandas behavior
return left_scol.rename(None) # type: ignore[attr-defined]
else:
# When comparing with scalar-like, keep the name of left operand
return cast(SeriesOrIndex, left_scol)
if _is_boolean_type(right): # numeric vs. bool
right = transform_boolean_operand_to_numeric(
right, spark_type=left.spark.data_type
Expand All @@ -294,6 +294,12 @@ def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:

def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
_sanitize_list_like(right)
if _should_return_all_false(left, right):
left_scol = left._with_new_scol(F.lit(True))
if isinstance(right, IndexOpsMixin):
return left_scol.rename(None) # type: ignore[attr-defined]
else:
return cast(SeriesOrIndex, left_scol)
return pyspark_column_op("__ne__", left, right, fillna=True)

def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
Expand Down
6 changes: 6 additions & 0 deletions python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,19 @@ def test_eq(self):
psser, other_psser = psdf["this"], psdf["that"]
self.assert_eq(pser == other_pser, psser == other_psser)
self.assert_eq(pser == pser, psser == psser)
# SPARK-54665: boolean vs string comparison should match pandas behavior
self.assert_eq(pser == "True", psser == "True")
self.assert_eq(pser == "False", psser == "False")

def test_ne(self):
pdf, psdf = self.bool_pdf, self.bool_psdf
pser, other_pser = pdf["this"], pdf["that"]
psser, other_psser = psdf["this"], psdf["that"]
self.assert_eq(pser != other_pser, psser != other_psser)
self.assert_eq(pser != pser, psser != psser)
# SPARK-54665: boolean vs string comparison should match pandas behavior
self.assert_eq(pser != "True", psser != "True")
self.assert_eq(pser != "False", psser != "False")

def test_lt(self):
pdf, psdf = self.bool_pdf, self.bool_psdf
Expand Down