feat(pandas): add problem 1321

IndexSeek · IndexSeek · commit 715b9ec4bc88 · 2024-12-13T00:20:59.000Z
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ Fiddling around with DataFusion, pandas, and PyArrow.
 - [X] [] [] 1978. [Employees Whose Manager Left the Company](https://leetcode.com/problems/employees-whose-manager-left-the-company) - Easy
 - [X] [] [] 626. [Exchange Seats](https://leetcode.com/problems/exchange-seats) - Medium
 - [X] [] [] 1341. [Movie Rating](https://leetcode.com/problems/movie-rating) - Medium
-- [] [X] [] 1321. [Restaurant Growth](https://leetcode.com/problems/restaurant-growth) - Medium
+- [] [X] [X] 1321. [Restaurant Growth](https://leetcode.com/problems/restaurant-growth) - Medium
 - [X] [] [] 602. [Friend Requests II: Who Has the Most Friends](https://leetcode.com/problems/friend-requests-ii-who-has-the-most-friends) - Medium
 - [X] [] [] 585. [Investments in 2016](https://leetcode.com/problems/investments-in-2016) - Medium
 - [X] [] [] 185. [Department Top Three Salaries](https://leetcode.com/problems/department-top-three-salaries) - Hard
diff --git a/problems/pandas.py b/problems/pandas.py
@@ -29,3 +29,33 @@ def problem_176(employee: pd.DataFrame) -> pd.DataFrame:
     if result.empty:
         return pd.DataFrame([None], columns=["SecondHighestSalary"])
     return result
+
+
+def problem_1321(customer: pd.DataFrame) -> pd.DataFrame:
+    """Compute the moving average of how much the customer paid in a seven days window.
+
+    You are the restaurant owner and you want to analyze a possible expansion (there
+    will be at least one customer every day). Seven day window refers to current day +
+    6 days before. `average_amount` should be rounded to two decimal places.
+
+    Return the result table ordered by visited_on in ascending order.
+
+    Parameters
+    ----------
+    customer : pa.Table
+        Table shows the amount paid by a customer on a certain day.
+
+    Returns
+    -------
+    pd.DataFrame
+
+    """
+    grouped = customer.groupby(["visited_on"]).aggregate(
+        amount=pd.NamedAgg("amount", "sum")
+    )
+    grouped = (
+        grouped.assign(amount=grouped["amount"].rolling("7D").sum())
+        .reset_index()
+        .loc[6:]
+    )
+    return grouped.assign(average_amount=(grouped["amount"] / 7).round(2))
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -1,7 +1,9 @@
+from datetime import datetime
+
 import pandas as pd
 import pytest
 
-from problems.pandas import problem_176
+from problems.pandas import problem_176, problem_1321
 
 
 @pytest.mark.parametrize(
@@ -33,3 +35,110 @@ def test_problem_176(input_data, expected_data):
     expected_table = pd.DataFrame(expected_data)
     result = problem_176(table)
     assert result.equals(expected_table)
+
+
+@pytest.mark.parametrize(
+    "input_data, expected_data",
+    [
+        pytest.param(
+            {
+                "customer_id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 3],
+                "name": [
+                    "Jhon",
+                    "Daniel",
+                    "Jade",
+                    "Khaled",
+                    "Winston",
+                    "Elvis",
+                    "Anna",
+                    "Maria",
+                    "Jaze",
+                    "Jhon",
+                    "Jade",
+                ],
+                "visited_on": [
+                    datetime(2019, 1, 1),
+                    datetime(2019, 1, 2),
+                    datetime(2019, 1, 3),
+                    datetime(2019, 1, 4),
+                    datetime(2019, 1, 5),
+                    datetime(2019, 1, 6),
+                    datetime(2019, 1, 7),
+                    datetime(2019, 1, 8),
+                    datetime(2019, 1, 9),
+                    datetime(2019, 1, 10),
+                    datetime(2019, 1, 10),
+                ],
+                "amount": [100, 110, 120, 130, 110, 140, 150, 80, 110, 130, 150],
+            },
+            {
+                "visited_on": [
+                    datetime(2019, 1, 7),
+                    datetime(2019, 1, 8),
+                    datetime(2019, 1, 9),
+                    datetime(2019, 1, 10),
+                ],
+                "amount": [860, 840, 840, 1000],
+                "average_amount": [122.86, 120, 120, 142.86],
+            },
+            id="happy_path",
+        ),
+        pytest.param(
+            {
+                "customer_id": [1, 2, 3, 1, 4, 5, 6, 1, 7, 8, 9],
+                "name": [
+                    "Jhon",
+                    "Daniel",
+                    "Jade",
+                    "Jhon",
+                    "Khaled",
+                    "Winston",
+                    "Elvis",
+                    "Jhon",
+                    "Anna",
+                    "Maria",
+                    "Jaze",
+                ],
+                "visited_on": [
+                    datetime(2019, 1, 1),
+                    datetime(2019, 1, 2),
+                    datetime(2019, 1, 3),
+                    datetime(2019, 1, 1),
+                    datetime(2019, 1, 4),
+                    datetime(2019, 1, 5),
+                    datetime(2019, 1, 6),
+                    datetime(2019, 1, 1),
+                    datetime(2019, 1, 7),
+                    datetime(2019, 1, 8),
+                    datetime(2019, 1, 9),
+                ],
+                "amount": [100, 110, 120, 50, 130, 110, 140, 40, 150, 80, 110],
+            },
+            {
+                "visited_on": [
+                    datetime(2019, 1, 7),
+                    datetime(2019, 1, 8),
+                    datetime(2019, 1, 9),
+                ],
+                "amount": [950, 840, 840],
+                "average_amount": [135.71, 120, 120],
+            },
+            id="duplicated_days",
+        ),
+    ],
+)
+def test_problem_1321(input_data, expected_data):
+    table = pd.DataFrame(input_data)
+    expected_table = pd.DataFrame(expected_data).reset_index(drop=True)
+    result = (
+        problem_1321(table)
+        .reset_index(drop=True)
+        .astype(expected_table.dtypes.to_dict())
+    )
+    assert list(result.index) == list(
+        expected_table.index
+    ), f"Index mismatch: {result.index} vs {expected_table.index}"
+    for col in expected_table.columns:
+        assert result[col].equals(expected_table[col]), f"Mismatch in column '{col}'"
+
+    assert result.equals(expected_table)