Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,25 +1217,26 @@ def count_overload(df, axis=0, level=None, numeric_only=False):

def _dataframe_codegen_isna(func_name, columns, df):
"""
Example func_text for func_name='isna' columns=('float', 'int', 'string'):

Example if generated implementation
def _df_isna_impl(df):
series_float = pandas.Series(df._data[0])
result_float = series_float.isna()
series_int = pandas.Series(df._data[1])
result_int = series_int.isna()
series_string = pandas.Series(df._data[2])
result_string = series_string.isna()
return pandas.DataFrame({"float": result_float, "int": result_int, "string": result_string},
index = df._index)
data_0 = df._data[0][0]
series_0 = pandas.Series(data_0)
result_0 = series_0.isna()
data_1 = df._data[1][0]
series_1 = pandas.Series(data_1)
result_1 = series_1.isna()
return pandas.DataFrame({"A": result_0, "B": result_1}, index=df._index)
"""
results = []
func_lines = [f'def _df_{func_name}_impl(df):']
index = df_index_codegen_all(df)
for i, c in enumerate(columns):
result_c = f'result_{c}'
func_lines += [f' series_{c} = pandas.Series(df._data[{i}])',
f' {result_c} = series_{c}.{func_name}()']
col_loc = df.column_loc[c]
type_id, col_id = col_loc.type_id, col_loc.col_id
result_c = f'result_{i}'
func_lines += [f' data_{i} = df._data[{type_id}][{col_id}]',
f' series_{i} = pandas.Series(data_{i})',
f' {result_c} = series_{i}.{func_name}()']
results.append((columns[i], result_c))

data = ', '.join(f'"{col}": {data}' for col, data in results)
Expand Down
16 changes: 15 additions & 1 deletion sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ def test_impl(df):
'D': [None, 'dd', '', None]})
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

@dfRefactoringNotImplemented
@dfRefactoringNotImplemented # required re-implementing DataFrame unboxing
def test_df_isna(self):
def test_impl(df):
return df.isna()
Expand All @@ -944,6 +944,20 @@ def test_impl(df):
with self.subTest(index=idx):
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@dfRefactoringNotImplemented # required re-implementing DataFrame boxing
def test_df_isna_no_unboxing(self):
def test_impl():
df = pd.DataFrame({
"A": [3.2, np.nan, 7.0, 3.3, np.nan],
"B": [3, 4, 1, 0, 222],
"C": [True, True, False, False, True],
"D": ['a', 'dd', 'c', '12', None]
}, index=[3, 4, 2, 6, 1])
return df.isna()

sdc_func = sdc.jit(test_impl)
pd.testing.assert_frame_equal(sdc_func(), test_impl())

@dfRefactoringNotImplemented
def test_df_bool(self):
def test_impl(df):
Expand Down