Skip to content

Commit 7319f22

Browse files
committed
feat(pandas): add problem 1907
1 parent 87f1ed1 commit 7319f22

File tree

3 files changed

+78
-1
lines changed

3 files changed

+78
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Fiddling around with DataFusion, pandas, and PyArrow.
5252
| 180 | [Consecutive Numbers](https://leetcode.com/problems/consecutive-numbers) | Medium ||||
5353
| 1164 | [Product Price at a Given Date](https://leetcode.com/problems/product-price-at-a-given-date) | Medium ||||
5454
| 1204 | [Last Person to Fit in the Bus](https://leetcode.com/problems/last-person-to-fit-in-the-bus) | Medium ||||
55-
| 1907 | [Count Salary Categories](https://leetcode.com/problems/count-salary-categories) | Medium || ||
55+
| 1907 | [Count Salary Categories](https://leetcode.com/problems/count-salary-categories) | Medium || ||
5656
## Subqueries
5757
| problem_id | title | difficulty | DataFusion | pandas | PyArrow |
5858
|-------------:|:--------------------------------------------------------------------------------------------------------------------------|:-------------|:-------------|:---------|:----------|

problems/pandas.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,53 @@ def problem_1789(employee: pd.DataFrame) -> pd.DataFrame:
12581258
]
12591259

12601260

1261+
def problem_1907(accounts: pd.DataFrame) -> pd.DataFrame:
1262+
"""Calculate the number of bank accounts for each salary category.
1263+
1264+
The salary categories are:
1265+
1266+
- "Low Salary": All the salaries strictly less than $20000.
1267+
- "Average Salary": All the salaries in the inclusive range [$20000, $50000].
1268+
- "High Salary": All the salaries strictly greater than $50000.
1269+
1270+
The result table must contain all three categories. If there are no accounts in a category, return 0.
1271+
1272+
Return the result table in any order.
1273+
1274+
Parameters
1275+
----------
1276+
accounts : pd.DataFrame
1277+
A table containing the account data.
1278+
1279+
Returns
1280+
-------
1281+
pd.DataFrame
1282+
1283+
"""
1284+
accounts["category"] = accounts["income"].case_when(
1285+
[
1286+
(accounts["income"] < 20_000, "Low Salary"),
1287+
(
1288+
(accounts["income"] >= 20_000) & (accounts["income"] <= 50_000),
1289+
"Average Salary",
1290+
),
1291+
(accounts["income"] > 50_000, "High Salary"),
1292+
]
1293+
)
1294+
return (
1295+
pd.DataFrame(
1296+
["Low Salary", "Average Salary", "High Salary"], columns=["category"]
1297+
)
1298+
.merge(
1299+
accounts.groupby("category", as_index=False).aggregate(
1300+
accounts_count=pd.NamedAgg("account_id", "count")
1301+
),
1302+
how="left",
1303+
)
1304+
.fillna(0)
1305+
)
1306+
1307+
12611308
def problem_1934(signups: pd.DataFrame, confirmations: pd.DataFrame) -> pd.DataFrame:
12621309
"""Find the confirmation rate of each user.
12631310

tests/test_pandas.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,6 +2293,36 @@ def test_problem_1789(input_data, expected_data):
22932293
)
22942294

22952295

2296+
@pytest.mark.parametrize(
2297+
"input_data, expected_data",
2298+
[
2299+
pytest.param(
2300+
{"account_id": [1, 2, 3], "income": [10000, 25000, 100000]},
2301+
{
2302+
"category": ["Low Salary", "Average Salary", "High Salary"],
2303+
"accounts_count": [1, 1, 1],
2304+
},
2305+
id="all_three_salary_categories",
2306+
),
2307+
pytest.param(
2308+
{"account_id": [1, 2], "income": [10000, 25000]},
2309+
{
2310+
"category": ["Low Salary", "Average Salary", "High Salary"],
2311+
"accounts_count": [1, 1, 0],
2312+
},
2313+
id="missing_one_salary_category",
2314+
),
2315+
],
2316+
)
2317+
def test_problem_1907(input_data, expected_data):
2318+
table = pd.DataFrame(input_data)
2319+
expected_table = pd.DataFrame(expected_data)
2320+
result = problem_1907(table).reset_index(drop=True)
2321+
assert_frame_equal(
2322+
result, expected_table, check_dtype=False, check_index_type=False
2323+
)
2324+
2325+
22962326
@pytest.mark.parametrize(
22972327
"input_data_1, input_data_2, expected_data",
22982328
[

0 commit comments

Comments
 (0)