feat(pandas): add problem 196

IndexSeek · IndexSeek · commit d16e7e0032ec · 2024-12-31T16:58:31.000Z
diff --git a/README.md b/README.md
@@ -68,7 +68,7 @@ Fiddling around with DataFusion, pandas, and PyArrow.
 |-------------:|:-------------------------------------------------------------------------------------------------------------|:-------------|:-------------|:---------|:----------|
 |         1667 | [Fix Names in a Table](https://leetcode.com/problems/fix-names-in-a-table)                                   | Easy         | ❌           | ❌       | ✅        |
 |         1527 | [Patients With a Condition](https://leetcode.com/problems/patients-with-a-condition)                         | Easy         | ❌           | ✅       | ✅        |
-|          196 | [Delete Duplicate Emails](https://leetcode.com/problems/delete-duplicate-emails)                             | Easy         | ❌           | ❌       | ✅        |
+|          196 | [Delete Duplicate Emails](https://leetcode.com/problems/delete-duplicate-emails)                             | Easy         | ❌           | ✅       | ✅        |
 |          176 | [Second Highest Salary](https://leetcode.com/problems/second-highest-salary)                                 | Medium       | ✅           | ✅       | ✅        |
 |         1484 | [Group Sold Products By The Date](https://leetcode.com/problems/group-sold-products-by-the-date)             | Easy         | ✅           | ✅       | ❌        |
 |         1327 | [List the Products Ordered in a Period](https://leetcode.com/problems/list-the-products-ordered-in-a-period) | Easy         | ❌           | ✅       | ✅        |
diff --git a/problems/pandas.py b/problems/pandas.py
@@ -63,6 +63,28 @@ def problem_180(logs: pd.DataFrame) -> pd.DataFrame:
     )
 
 
+def problem_196(person: pd.DataFrame) -> pd.DataFrame:
+    """Delete duplicate emails, keeping one unique email with the smallest ID.
+
+    Write a solution to delete all duplicate emails, keeping only one unique email
+    with the smallest id.
+
+    The final order of the Person table does not matter.
+
+    Parameters
+    ----------
+    person : pd.DataFrame
+        A table containing email addresses.
+
+    Returns
+    -------
+    pd.DataFrame
+
+    """
+    person = person.sort_values(["id", "email"], ascending=[True, True])
+    return person.drop_duplicates(subset=["email"], keep="first")
+
+
 def problem_197(weather: pd.DataFrame) -> pd.DataFrame:
     """Find IDs of dates with higher temperatures than the previous day.
 
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -94,6 +94,49 @@ def test_problem_180(input_data, expected_data):
     )
 
 
+@pytest.mark.parametrize(
+    "input_data, expected_data",
+    [
+        pytest.param(
+            {
+                "id": [1, 2, 3],
+                "email": ["a@example.com", "b@example.com", "c@example.com"],
+            },
+            {
+                "id": [1, 2, 3],
+                "email": ["a@example.com", "b@example.com", "c@example.com"],
+            },
+            id="unique_emails",
+        ),
+        pytest.param(
+            {
+                "id": [1, 2, 3, 4],
+                "email": [
+                    "a@example.com",
+                    "b@example.com",
+                    "a@example.com",
+                    "b@example.com",
+                ],
+            },
+            {"id": [1, 2], "email": ["a@example.com", "b@example.com"]},
+            id="duplicate_emails",
+        ),
+        pytest.param(
+            {"id": [1], "email": ["a@example.com"]},
+            {"id": [1], "email": ["a@example.com"]},
+            id="single_row",
+        ),
+    ],
+)
+def test_problem_196(input_data, expected_data):
+    table = pd.DataFrame(input_data)
+    expected_table = pd.DataFrame(expected_data)
+    result = problem_196(table).reset_index(drop=True)
+    assert_frame_equal(
+        result, expected_table, check_dtype=False, check_index_type=False
+    )
+
+
 @pytest.mark.parametrize(
     "input_data, expected_data",
     [