-
Notifications
You must be signed in to change notification settings - Fork 50
feat: df.join lsuffix and rsuffix support #1857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8e85a0b
515c985
481a6bb
e66a0a1
798d3d5
14a1c54
8c6630b
69fa715
9748b35
53ef0cc
8b09d10
4e80220
052e090
d661ea6
1ba81a4
014bb73
cd4d962
12464f2
6892d84
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2444,12 +2444,40 @@ def test_join_different_table( | |
assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) | ||
|
||
|
||
def test_join_duplicate_columns_raises_not_implemented(scalars_dfs): | ||
@all_joins | ||
def test_join_raise_when_param_on_duplicate_with_column(scalars_df_index, how): | ||
if how == "cross": | ||
return | ||
bf_df_a = scalars_df_index[["string_col", "int64_col"]].rename( | ||
columns={"int64_col": "string_col"} | ||
) | ||
bf_df_b = scalars_df_index.dropna()["string_col"] | ||
with pytest.raises( | ||
ValueError, match="The column label 'string_col' is not unique." | ||
): | ||
bf_df_a.join(bf_df_b, on="string_col", how=how, lsuffix="_l", rsuffix="_r") | ||
|
||
|
||
def test_join_duplicate_columns_raises_value_error(scalars_dfs): | ||
scalars_df, _ = scalars_dfs | ||
df_a = scalars_df[["string_col", "float64_col"]] | ||
df_b = scalars_df[["float64_col"]] | ||
with pytest.raises(NotImplementedError): | ||
df_a.join(df_b, how="outer").to_pandas() | ||
with pytest.raises(ValueError, match="columns overlap but no suffix specified"): | ||
df_a.join(df_b, how="outer") | ||
|
||
|
||
@all_joins | ||
def test_join_param_on_duplicate_with_index_raises_value_error(scalars_df_index, how): | ||
if how == "cross": | ||
return | ||
Comment on lines
+2471
to
+2472
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it'd be worth added a test that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cross join actually raise another error, match added. |
||
bf_df_a = scalars_df_index[["string_col"]] | ||
bf_df_a.index.name = "string_col" | ||
bf_df_b = scalars_df_index.dropna()["string_col"] | ||
with pytest.raises( | ||
ValueError, | ||
match="'string_col' is both an index level and a column label, which is ambiguous.", | ||
): | ||
bf_df_a.join(bf_df_b, on="string_col", how=how, lsuffix="_l", rsuffix="_r") | ||
|
||
|
||
@all_joins | ||
|
@@ -2461,7 +2489,7 @@ def test_join_param_on(scalars_dfs, how): | |
bf_df_b = bf_df[["float64_col"]] | ||
|
||
if how == "cross": | ||
with pytest.raises(ValueError): | ||
with pytest.raises(ValueError, match="'on' is not supported for cross join."): | ||
bf_df_a.join(bf_df_b, on="rowindex_2", how=how) | ||
else: | ||
bf_result = bf_df_a.join(bf_df_b, on="rowindex_2", how=how).to_pandas() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: This
if
block is getting pretty long. Might be time for a helper function.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added _join_on_key function.