Skip to content

Commit d16e7e0

Browse files
committed
feat(pandas): add problem 196
1 parent a8c0aa0 commit d16e7e0

File tree

3 files changed

+66
-1
lines changed

3 files changed

+66
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Fiddling around with DataFusion, pandas, and PyArrow.
6868
|-------------:|:-------------------------------------------------------------------------------------------------------------|:-------------|:-------------|:---------|:----------|
6969
| 1667 | [Fix Names in a Table](https://leetcode.com/problems/fix-names-in-a-table) | Easy ||||
7070
| 1527 | [Patients With a Condition](https://leetcode.com/problems/patients-with-a-condition) | Easy ||||
71-
| 196 | [Delete Duplicate Emails](https://leetcode.com/problems/delete-duplicate-emails) | Easy || ||
71+
| 196 | [Delete Duplicate Emails](https://leetcode.com/problems/delete-duplicate-emails) | Easy || ||
7272
| 176 | [Second Highest Salary](https://leetcode.com/problems/second-highest-salary) | Medium ||||
7373
| 1484 | [Group Sold Products By The Date](https://leetcode.com/problems/group-sold-products-by-the-date) | Easy ||||
7474
| 1327 | [List the Products Ordered in a Period](https://leetcode.com/problems/list-the-products-ordered-in-a-period) | Easy ||||

problems/pandas.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,28 @@ def problem_180(logs: pd.DataFrame) -> pd.DataFrame:
6363
)
6464

6565

66+
def problem_196(person: pd.DataFrame) -> pd.DataFrame:
67+
"""Delete duplicate emails, keeping one unique email with the smallest ID.
68+
69+
Write a solution to delete all duplicate emails, keeping only one unique email
70+
with the smallest id.
71+
72+
The final order of the Person table does not matter.
73+
74+
Parameters
75+
----------
76+
person : pd.DataFrame
77+
A table containing email addresses.
78+
79+
Returns
80+
-------
81+
pd.DataFrame
82+
83+
"""
84+
person = person.sort_values(["id", "email"], ascending=[True, True])
85+
return person.drop_duplicates(subset=["email"], keep="first")
86+
87+
6688
def problem_197(weather: pd.DataFrame) -> pd.DataFrame:
6789
"""Find IDs of dates with higher temperatures than the previous day.
6890

tests/test_pandas.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,49 @@ def test_problem_180(input_data, expected_data):
9494
)
9595

9696

97+
@pytest.mark.parametrize(
98+
"input_data, expected_data",
99+
[
100+
pytest.param(
101+
{
102+
"id": [1, 2, 3],
103+
"email": ["a@example.com", "b@example.com", "c@example.com"],
104+
},
105+
{
106+
"id": [1, 2, 3],
107+
"email": ["a@example.com", "b@example.com", "c@example.com"],
108+
},
109+
id="unique_emails",
110+
),
111+
pytest.param(
112+
{
113+
"id": [1, 2, 3, 4],
114+
"email": [
115+
"a@example.com",
116+
"b@example.com",
117+
"a@example.com",
118+
"b@example.com",
119+
],
120+
},
121+
{"id": [1, 2], "email": ["a@example.com", "b@example.com"]},
122+
id="duplicate_emails",
123+
),
124+
pytest.param(
125+
{"id": [1], "email": ["a@example.com"]},
126+
{"id": [1], "email": ["a@example.com"]},
127+
id="single_row",
128+
),
129+
],
130+
)
131+
def test_problem_196(input_data, expected_data):
132+
table = pd.DataFrame(input_data)
133+
expected_table = pd.DataFrame(expected_data)
134+
result = problem_196(table).reset_index(drop=True)
135+
assert_frame_equal(
136+
result, expected_table, check_dtype=False, check_index_type=False
137+
)
138+
139+
97140
@pytest.mark.parametrize(
98141
"input_data, expected_data",
99142
[

0 commit comments

Comments
 (0)