@@ -124,17 +124,27 @@ def null_df():
124124 ctx = SessionContext ()
125125
126126 # Create a RecordBatch with nulls across different types
127- batch = pa .RecordBatch .from_arrays ([
128- pa .array ([1 , None , 3 , None ], type = pa .int64 ()),
129- pa .array ([4.5 , 6.7 , None , None ], type = pa .float64 ()),
130- pa .array (["a" , None , "c" , None ], type = pa .string ()),
131- pa .array ([True , None , False , None ], type = pa .bool_ ()),
132- pa .array ([10957 , None , 18993 , None ],
133- type = pa .date32 ()), # 2000-01-01, null, 2022-01-01, null
134- pa .array ([946684800000 , None , 1640995200000 , None ],
135- type = pa .date64 ()), # 2000-01-01, null, 2022-01-01, null
136- ], names = ["int_col" , "float_col" , "str_col" , "bool_col" , "date32_col" ,
137- "date64_col" ]
127+ batch = pa .RecordBatch .from_arrays (
128+ [
129+ pa .array ([1 , None , 3 , None ], type = pa .int64 ()),
130+ pa .array ([4.5 , 6.7 , None , None ], type = pa .float64 ()),
131+ pa .array (["a" , None , "c" , None ], type = pa .string ()),
132+ pa .array ([True , None , False , None ], type = pa .bool_ ()),
133+ pa .array (
134+ [10957 , None , 18993 , None ], type = pa .date32 ()
135+ ), # 2000-01-01, null, 2022-01-01, null
136+ pa .array (
137+ [946684800000 , None , 1640995200000 , None ], type = pa .date64 ()
138+ ), # 2000-01-01, null, 2022-01-01, null
139+ ],
140+ names = [
141+ "int_col" ,
142+ "float_col" ,
143+ "str_col" ,
144+ "bool_col" ,
145+ "date32_col" ,
146+ "date64_col" ,
147+ ],
138148 )
139149
140150 return ctx .create_dataframe ([[batch ]])
@@ -1671,6 +1681,7 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
16711681 assert "<style>" in local_html_1
16721682 assert "<style>" in local_html_2
16731683
1684+
16741685def test_fill_null_basic (null_df ):
16751686 """Test basic fill_null functionality with a single value."""
16761687 # Fill all nulls with 0
@@ -1701,6 +1712,7 @@ def test_fill_null_subset(null_df):
17011712 assert None in result .column (2 ).to_pylist ()
17021713 assert None in result .column (3 ).to_pylist ()
17031714
1715+
17041716def test_fill_null_str_column (null_df ):
17051717 """Test filling nulls in string columns with different values."""
17061718 # Fill string nulls with a replacement string
@@ -1753,9 +1765,9 @@ def test_fill_null_date32_column(null_df):
17531765 # Check that date32 nulls were filled with epoch date
17541766 dates = result .column (4 ).to_pylist ()
17551767 assert dates [0 ] == datetime .date (2000 , 1 , 1 ) # Original value
1756- assert dates [1 ] == epoch_date # Filled value
1768+ assert dates [1 ] == epoch_date # Filled value
17571769 assert dates [2 ] == datetime .date (2022 , 1 , 1 ) # Original value
1758- assert dates [3 ] == epoch_date # Filled value
1770+ assert dates [3 ] == epoch_date # Filled value
17591771
17601772 # Other date column should be unchanged
17611773 assert None in result .column (5 ).to_pylist ()
@@ -1773,9 +1785,9 @@ def test_fill_null_date64_column(null_df):
17731785 # Check that date64 nulls were filled with epoch date
17741786 dates = result .column (5 ).to_pylist ()
17751787 assert dates [0 ] == datetime .date (2000 , 1 , 1 ) # Original value
1776- assert dates [1 ] == epoch_date # Filled value
1788+ assert dates [1 ] == epoch_date # Filled value
17771789 assert dates [2 ] == datetime .date (2022 , 1 , 1 ) # Original value
1778- assert dates [3 ] == epoch_date # Filled value
1790+ assert dates [3 ] == epoch_date # Filled value
17791791
17801792 # Other date column should be unchanged
17811793 assert None in result .column (4 ).to_pylist ()
@@ -1835,10 +1847,18 @@ def test_fill_null_specific_types(null_df):
18351847 assert result .column (1 ).to_pylist () == [4.5 , 6.7 , None , None ]
18361848 assert result .column (2 ).to_pylist () == ["a" , "missing" , "c" , "missing" ]
18371849 assert result .column (3 ).to_pylist () == [True , None , False , None ] # Bool gets False
1838- assert result .column (4 ).to_pylist () == [datetime .date (2000 , 1 , 1 ),
1839- None , datetime .date (2022 , 1 , 1 ), None ]
1840- assert result .column (5 ).to_pylist () == [datetime .date (2000 , 1 , 1 ),
1841- None , datetime .date (2022 , 1 , 1 ), None ]
1850+ assert result .column (4 ).to_pylist () == [
1851+ datetime .date (2000 , 1 , 1 ),
1852+ None ,
1853+ datetime .date (2022 , 1 , 1 ),
1854+ None ,
1855+ ]
1856+ assert result .column (5 ).to_pylist () == [
1857+ datetime .date (2000 , 1 , 1 ),
1858+ None ,
1859+ datetime .date (2022 , 1 , 1 ),
1860+ None ,
1861+ ]
18421862
18431863
18441864def test_fill_null_immutability (null_df ):
@@ -1863,7 +1883,7 @@ def test_fill_null_empty_df(ctx):
18631883 # Create an empty DataFrame with schema
18641884 batch = pa .RecordBatch .from_arrays (
18651885 [pa .array ([], type = pa .int64 ()), pa .array ([], type = pa .string ())],
1866- names = ["a" , "b" ]
1886+ names = ["a" , "b" ],
18671887 )
18681888 empty_df = ctx .create_dataframe ([[batch ]])
18691889
@@ -1883,7 +1903,7 @@ def test_fill_null_all_null_column(ctx):
18831903 # Create DataFrame with a column of all nulls
18841904 batch = pa .RecordBatch .from_arrays (
18851905 [pa .array ([1 , 2 , 3 ]), pa .array ([None , None , None ], type = pa .string ())],
1886- names = ["a" , "b" ]
1906+ names = ["a" , "b" ],
18871907 )
18881908 all_null_df = ctx .create_dataframe ([[batch ]])
18891909
0 commit comments