Skip to content

Commit d03c597

Browse files
committed
feat(datafusion): add problem 180
1 parent 9d960ed commit d03c597

File tree

4 files changed

+132
-14
lines changed

4 files changed

+132
-14
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Fiddling around with DataFusion, pandas, and PyArrow.
5858
| 1731 | [The Number of Employees Which Report to Each Employee](https://leetcode.com/problems/the-number-of-employees-which-report-to-each-employee) | Easy ||||
5959
| 1789 | [Primary Department for Each Employee](https://leetcode.com/problems/primary-department-for-each-employee) | Easy ||||
6060
| 610 | [Triangle Judgement](https://leetcode.com/problems/triangle-judgement) | Easy ||||
61-
| 180 | [Consecutive Numbers](https://leetcode.com/problems/consecutive-numbers) | Medium || ||
61+
| 180 | [Consecutive Numbers](https://leetcode.com/problems/consecutive-numbers) | Medium || ||
6262
| 1164 | [Product Price at a Given Date](https://leetcode.com/problems/product-price-at-a-given-date) | Medium ||||
6363
| 1204 | [Last Person to Fit in the Bus](https://leetcode.com/problems/last-person-to-fit-in-the-bus) | Medium ||||
6464
| 1907 | [Count Salary Categories](https://leetcode.com/problems/count-salary-categories) | Medium ||||

problems/datafusion.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,64 @@ def problem_176(employee: pa.Table) -> datafusion.dataframe.DataFrame:
3838
return t
3939

4040

41+
def problem_180(logs: pa.Table) -> datafusion.dataframe.DataFrame:
42+
"""Find all numbers that appear at least three times consecutively.
43+
44+
Return the result table in any order.
45+
46+
Parameters
47+
----------
48+
logs : pa.Table
49+
A table containing sequential ids and numbers.
50+
51+
Returns
52+
-------
53+
datafusion.dataframe.DataFrame
54+
55+
Examples
56+
--------
57+
>>> import datafusion
58+
>>> import datafusion.functions as F
59+
>>> import pyarrow as pa
60+
>>> from problems.datafusion import problem_180
61+
>>> from problems.datasets import load_problem_180
62+
>>> ctx = datafusion.SessionContext()
63+
>>> logs = pa.table(load_problem_180())
64+
>>> problem_180(logs)
65+
DataFrame()
66+
+-----------------+
67+
| ConsecutiveNums |
68+
+-----------------+
69+
| 1 |
70+
+-----------------+
71+
72+
"""
73+
ctx = datafusion.SessionContext()
74+
logs = ctx.from_arrow(logs)
75+
logs = logs.select(
76+
F.col("num"),
77+
F.lag(F.col("num"), order_by=[F.col("id")]).alias("num_lag_1"),
78+
F.lag(F.col("num"), 2, order_by=[F.col("id")]).alias("num_lag_2"),
79+
)
80+
filtered = (
81+
logs.filter(
82+
(F.col("num") == F.col("num_lag_1")) & (F.col("num") == F.col("num_lag_2"))
83+
)
84+
.select("num")
85+
.with_column_renamed("num", "ConsecutiveNums")
86+
)
87+
ctx.from_arrow(filtered.to_arrow_table(), "filtered")
88+
result = ctx.sql("""SELECT DISTINCT "ConsecutiveNums" FROM filtered""")
89+
if result.to_arrow_table().num_rows == 0:
90+
return ctx.from_arrow(
91+
pa.table(
92+
{"ConsecutiveNums": [pa.scalar(None, type=pa.int64())]},
93+
schema=pa.schema({"ConsecutiveNums": pa.int64()}),
94+
)
95+
)
96+
return result
97+
98+
4199
def problem_584(customer: pa.Table) -> datafusion.dataframe.DataFrame:
42100
"""Find names of customers not referred by the customer with ID = 2.
43101

problems/datasets.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Functions to load LeetCode problem datasets to pandas DataFrames."""
22

3+
from typing import Tuple
4+
35
import pandas as pd
46

57

@@ -17,7 +19,7 @@ def load_problem_180() -> pd.DataFrame:
1719
)
1820

1921

20-
def load_problem_185() -> tuple(pd.DataFrame, pd.DataFrame):
22+
def load_problem_185() -> Tuple[pd.DataFrame, pd.DataFrame]:
2123
data = [
2224
[1, "Joe", 85000, 1],
2325
[2, "Henry", 80000, 2],
@@ -92,7 +94,7 @@ def load_problem_570() -> pd.DataFrame:
9294
)
9395

9496

95-
def load_problem_577() -> tuple(pd.DataFrame, pd.DataFrame):
97+
def load_problem_577() -> Tuple[pd.DataFrame, pd.DataFrame]:
9698
data = [
9799
[3, "Brad", None, 4000],
98100
[1, "John", 3, 1000],
@@ -233,7 +235,7 @@ def load_problem_626() -> pd.DataFrame:
233235
)
234236

235237

236-
def load_problem_1045() -> tuple(pd.DataFrame, pd.DataFrame):
238+
def load_problem_1045() -> Tuple[pd.DataFrame, pd.DataFrame]:
237239
data = [[1, 5], [2, 6], [3, 5], [3, 6], [1, 6]]
238240
customer = pd.DataFrame(data, columns=["customer_id", "product_key"]).astype(
239241
{"customer_id": "Int64", "product_key": "Int64"}
@@ -269,7 +271,7 @@ def load_problem_1068() -> pd.DataFrame:
269271
return sales, product
270272

271273

272-
def load_problem_1070() -> tuple(pd.DataFrame, pd.DataFrame):
274+
def load_problem_1070() -> Tuple[pd.DataFrame, pd.DataFrame]:
273275
data = [
274276
[1, 100, 2008, 10, 5000],
275277
[2, 100, 2009, 12, 5000],
@@ -293,7 +295,7 @@ def load_problem_1070() -> tuple(pd.DataFrame, pd.DataFrame):
293295
return sales, product
294296

295297

296-
def load_problem_1075() -> tuple(pd.DataFrame, pd.DataFrame):
298+
def load_problem_1075() -> Tuple[pd.DataFrame, pd.DataFrame]:
297299
data = [[1, 1], [1, 2], [1, 3], [2, 1], [2, 4]]
298300
project = pd.DataFrame(data, columns=["project_id", "employee_id"]).astype(
299301
{"project_id": "Int64", "employee_id": "Int64"}
@@ -459,7 +461,7 @@ def load_problem_1211() -> pd.DataFrame:
459461
)
460462

461463

462-
def load_problem_1251() -> tuple(pd.DataFrame, pd.DataFrame):
464+
def load_problem_1251() -> Tuple[pd.DataFrame, pd.DataFrame]:
463465
data = [
464466
[1, "2019-02-17", "2019-02-28", 5],
465467
[1, "2019-03-01", "2019-03-22", 20],
@@ -490,7 +492,7 @@ def load_problem_1251() -> tuple(pd.DataFrame, pd.DataFrame):
490492
return prices, units_sold
491493

492494

493-
def load_problem_1280() -> tuple(pd.DataFrame, pd.DataFrame, pd.DataFrame):
495+
def load_problem_1280() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
494496
data = [[1, "Alice"], [2, "Bob"], [13, "John"], [6, "Alex"]]
495497
students = pd.DataFrame(data, columns=["student_id", "student_name"]).astype(
496498
{"student_id": "Int64", "student_name": "object"}
@@ -544,7 +546,7 @@ def load_problem_1321() -> pd.DataFrame:
544546
)
545547

546548

547-
def load_problem_1327() -> tuple(pd.DataFrame, pd.DataFrame):
549+
def load_problem_1327() -> Tuple[pd.DataFrame, pd.DataFrame]:
548550
data = [
549551
[1, "Leetcode Solutions", "Book"],
550552
[2, "Jewels of Stringology", "Book"],
@@ -577,7 +579,7 @@ def load_problem_1327() -> tuple(pd.DataFrame, pd.DataFrame):
577579
return products, orders
578580

579581

580-
def load_problem_1341() -> tuple(pd.DataFrame, pd.DataFrame, pd.DataFrame):
582+
def load_problem_1341() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
581583
data = [[1, "Avengers"], [2, "Frozen 2"], [3, "Joker"]]
582584
movies = pd.DataFrame(data, columns=["movie_id", "title"]).astype(
583585
{"movie_id": "Int64", "title": "object"}
@@ -610,7 +612,7 @@ def load_problem_1341() -> tuple(pd.DataFrame, pd.DataFrame, pd.DataFrame):
610612
return movies, users, movie_rating
611613

612614

613-
def load_problem_1378() -> tuple(pd.DataFrame, pd.DataFrame):
615+
def load_problem_1378() -> Tuple[pd.DataFrame, pd.DataFrame]:
614616
data = [[1, "Alice"], [7, "Bob"], [11, "Meir"], [90, "Winston"], [3, "Jonathan"]]
615617
employees = pd.DataFrame(data, columns=["id", "name"]).astype(
616618
{"id": "int64", "name": "object"}
@@ -665,7 +667,7 @@ def load_problem_1527() -> pd.DataFrame:
665667
).astype({"patient_id": "int64", "patient_name": "object", "conditions": "object"})
666668

667669

668-
def load_problem_1581() -> tuple(pd.DataFrame, pd.DataFrame):
670+
def load_problem_1581() -> Tuple[pd.DataFrame, pd.DataFrame]:
669671
data = [[1, 23], [2, 9], [4, 30], [5, 54], [6, 96], [7, 54], [8, 54]]
670672
visits = pd.DataFrame(data, columns=["visit_id", "customer_id"]).astype(
671673
{"visit_id": "Int64", "customer_id": "Int64"}
@@ -677,7 +679,7 @@ def load_problem_1581() -> tuple(pd.DataFrame, pd.DataFrame):
677679
return visits, transactions
678680

679681

680-
def load_problem_1633() -> tuple(pd.DataFrame, pd.DataFrame):
682+
def load_problem_1633() -> Tuple[pd.DataFrame, pd.DataFrame]:
681683
data = [[6, "Alice"], [2, "Bob"], [7, "Alex"]]
682684
users = pd.DataFrame(data, columns=["user_id", "user_name"]).astype(
683685
{"user_id": "Int64", "user_name": "object"}
@@ -806,7 +808,7 @@ def load_problem_1907() -> pd.DataFrame:
806808
)
807809

808810

809-
def load_problem_1934() -> tuple(pd.DataFrame, pd.DataFrame):
811+
def load_problem_1934() -> Tuple[pd.DataFrame, pd.DataFrame]:
810812
data = [
811813
[3, "2020-03-21 10:16:13"],
812814
[7, "2020-01-04 13:57:59"],

tests/test_datafusion.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from problems.datafusion import (
77
problem_176,
8+
problem_180,
89
problem_584,
910
problem_595,
1011
problem_620,
@@ -45,6 +46,63 @@ def test_problem_176(input_data, expected_data):
4546
assert result.to_arrow_table().equals(expected_table)
4647

4748

49+
@pytest.mark.parametrize(
50+
"input_data, expected_data",
51+
[
52+
pytest.param(
53+
{
54+
"id": [1, 2, 3, 4, 5, 6, 7, 8],
55+
"num": [1, 2, 3, 1, 1, 1, 4, 5],
56+
},
57+
{
58+
"ConsecutiveNums": [1],
59+
},
60+
id="one_consecutive_number_three_times",
61+
),
62+
pytest.param(
63+
{
64+
"id": [1, 2, 3, 4, 5, 6, 7, 8],
65+
"num": [1, 2, 3, 1, 1, 1, 1, 5],
66+
},
67+
{
68+
"ConsecutiveNums": [1],
69+
},
70+
id="one_consecutive_number_four_times",
71+
),
72+
pytest.param(
73+
{
74+
"id": [1, 2, 3, 4, 5],
75+
"num": [1, 2, 3, 4, 5],
76+
},
77+
{
78+
"ConsecutiveNums": [None],
79+
},
80+
id="no_consecutive_numbers",
81+
),
82+
pytest.param(
83+
{
84+
"id": [],
85+
"num": [],
86+
},
87+
{
88+
"ConsecutiveNums": [None],
89+
},
90+
id="empty_table",
91+
),
92+
],
93+
)
94+
def test_problem_180(input_data, expected_data):
95+
table = pa.Table.from_pydict(
96+
input_data,
97+
schema=pa.schema([pa.field("id", pa.int64()), pa.field("num", pa.int64())]),
98+
)
99+
expected_table = pa.Table.from_pydict(
100+
expected_data, schema=pa.schema([pa.field("ConsecutiveNums", pa.int64())])
101+
)
102+
result = problem_180(table)
103+
assert result.to_arrow_table().equals(expected_table)
104+
105+
48106
@pytest.mark.parametrize(
49107
"input_data, expected_data",
50108
[

0 commit comments

Comments
 (0)