Skip to content

Commit 1ad3d5d

Browse files
chore(python): Add additional unit tests for pl.concat (#24487)
Co-authored-by: Gijs Burghoorn <me@gburghoorn.com>
1 parent e2d9e26 commit 1ad3d5d

File tree

2 files changed

+268
-109
lines changed

2 files changed

+268
-109
lines changed

py-polars/tests/unit/functions/test_concat.py

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import io
2+
from typing import IO
3+
14
import pytest
25

36
import polars as pl
@@ -96,3 +99,268 @@ def test_concat_zip_series_21980() -> None:
9699
df = pl.DataFrame({"x": 1, "y": 2})
97100
out = df.select(pl.concat([pl.col.x, pl.col.y]), pl.Series([3, 4]))
98101
assert_frame_equal(out, pl.DataFrame({"x": [1, 2], "": [3, 4]}))
102+
103+
104+
def test_concat_invalid_schema_err_20355() -> None:
105+
lf1 = pl.LazyFrame({"x": [1], "y": [None]})
106+
lf2 = pl.LazyFrame({"y": [1]})
107+
with pytest.raises(pl.exceptions.InvalidOperationError):
108+
pl.concat([lf1, lf2]).collect(engine="streaming")
109+
110+
111+
def test_concat_df() -> None:
112+
df1 = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
113+
df2 = pl.concat([df1, df1], rechunk=True)
114+
115+
assert df2.shape == (6, 3)
116+
assert df2.n_chunks() == 1
117+
assert df2.rows() == df1.rows() + df1.rows()
118+
assert pl.concat([df1, df1], rechunk=False).n_chunks() == 2
119+
120+
# concat from generator of frames
121+
df3 = pl.concat(items=(df1 for _ in range(2)))
122+
assert_frame_equal(df2, df3)
123+
124+
# check that df4 is not modified following concat of itself
125+
df4 = pl.from_records(((1, 2), (1, 2)))
126+
_ = pl.concat([df4, df4, df4])
127+
128+
assert df4.shape == (2, 2)
129+
assert df4.rows() == [(1, 1), (2, 2)]
130+
131+
# misc error conditions
132+
with pytest.raises(ValueError):
133+
_ = pl.concat([])
134+
135+
with pytest.raises(ValueError):
136+
pl.concat([df1, df1], how="rubbish") # type: ignore[arg-type]
137+
138+
139+
def test_concat_to_empty() -> None:
140+
assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(
141+
as_series=False
142+
) == {"a": [1]}
143+
144+
145+
def test_concat_multiple_parquet_inmem() -> None:
146+
f = io.BytesIO()
147+
g = io.BytesIO()
148+
149+
df1 = pl.DataFrame(
150+
{
151+
"a": [1, 2, 3],
152+
"b": ["xyz", "abc", "wow"],
153+
}
154+
)
155+
df2 = pl.DataFrame(
156+
{
157+
"a": [5, 6, 7],
158+
"b": ["a", "few", "entries"],
159+
}
160+
)
161+
162+
dfs = pl.concat([df1, df2])
163+
164+
df1.write_parquet(f)
165+
df2.write_parquet(g)
166+
167+
f.seek(0)
168+
g.seek(0)
169+
170+
items: list[IO[bytes]] = [f, g]
171+
assert_frame_equal(pl.read_parquet(items), dfs)
172+
173+
f.seek(0)
174+
g.seek(0)
175+
176+
assert_frame_equal(pl.read_parquet(items, use_pyarrow=True), dfs)
177+
178+
f.seek(0)
179+
g.seek(0)
180+
181+
fb = f.read()
182+
gb = g.read()
183+
184+
assert_frame_equal(pl.read_parquet([fb, gb]), dfs)
185+
assert_frame_equal(pl.read_parquet([fb, gb], use_pyarrow=True), dfs)
186+
187+
188+
def test_concat_series() -> None:
189+
s = pl.Series("a", [2, 1, 3])
190+
191+
assert pl.concat([s, s]).len() == 6
192+
# check if s remains unchanged
193+
assert s.len() == 3
194+
195+
196+
def test_concat_null_20501() -> None:
197+
a = pl.DataFrame({"id": [1], "value": ["foo"]})
198+
b = pl.DataFrame({"id": [2], "value": [None]})
199+
200+
assert pl.concat([a.lazy(), b.lazy()]).collect().to_dict(as_series=False) == {
201+
"id": [1, 2],
202+
"value": ["foo", None],
203+
}
204+
205+
206+
def test_concat_single_element() -> None:
207+
df = pl.DataFrame({"a": [1, 2, 3]})
208+
result = pl.concat([df])
209+
assert result is df
210+
211+
s = pl.Series("test", [1, 2, 3])
212+
result_s = pl.concat([s])
213+
assert result_s is s
214+
215+
216+
def test_concat_diagonal() -> None:
217+
df1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
218+
df2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]})
219+
df3 = pl.DataFrame({"b": [9, 10], "c": [11, 12]})
220+
221+
result = pl.concat([df1, df2, df3], how="diagonal")
222+
expected = pl.DataFrame(
223+
{
224+
"a": [1, 2, 5, 6, None, None],
225+
"b": [3, 4, None, None, 9, 10],
226+
"c": [None, None, 7, 8, 11, 12],
227+
}
228+
)
229+
assert_frame_equal(result, expected)
230+
231+
232+
def test_concat_diagonal_relaxed() -> None:
233+
df1 = pl.DataFrame(
234+
{"a": [1, 2], "c": [10, 20]}, schema={"a": pl.Int32, "c": pl.Int64}
235+
)
236+
df2 = pl.DataFrame(
237+
{"a": [3.5, 4.5], "b": [30.1, 40.2]}, schema={"a": pl.Float64, "b": pl.Float32}
238+
)
239+
df3 = pl.DataFrame({"b": [5, 6], "c": [50, 60]})
240+
241+
result = pl.concat([df1, df2, df3], how="diagonal_relaxed")
242+
243+
assert result.schema["a"] == pl.Float64
244+
assert result.schema["b"] == pl.Float64
245+
assert result.schema["c"] == pl.Int64
246+
247+
expected = pl.DataFrame(
248+
{
249+
"a": [1.0, 2.0, 3.5, 4.5, None, None],
250+
"c": [10, 20, None, None, 50, 60],
251+
"b": [None, None, 30.1, 40.2, 5.0, 6.0],
252+
}
253+
)
254+
255+
assert_frame_equal(result, expected)
256+
257+
258+
def test_concat_horizontal() -> None:
259+
df1 = pl.DataFrame({"a": [1, 2, 3]})
260+
df2 = pl.DataFrame({"b": [4, 5]})
261+
df3 = pl.DataFrame({"c": [6, 7, 8, 9]})
262+
263+
result = pl.concat([df1, df2, df3], how="horizontal")
264+
expected = pl.DataFrame(
265+
{"a": [1, 2, 3, None], "b": [4, 5, None, None], "c": [6, 7, 8, 9]}
266+
)
267+
assert_frame_equal(result, expected)
268+
269+
270+
def test_concat_align_no_common_columns() -> None:
271+
df1 = pl.DataFrame({"a": [1, 2]})
272+
df2 = pl.DataFrame({"b": [3, 4]})
273+
274+
with pytest.raises(
275+
pl.exceptions.InvalidOperationError, match="requires at least one common column"
276+
):
277+
pl.concat([df1, df2], how="align")
278+
279+
280+
def test_concat_align_lazy_frames() -> None:
281+
lf1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]}).lazy()
282+
lf2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]}).lazy()
283+
284+
result = pl.concat([lf1, lf2], how="align")
285+
assert isinstance(result, pl.LazyFrame)
286+
287+
collected = result.collect()
288+
expected = pl.DataFrame({"id": [1, 2, 3], "x": [3, 4, None], "y": [None, 5, 6]})
289+
assert_frame_equal(collected, expected, check_row_order=False)
290+
291+
292+
def test_concat_lazyframe_horizontal() -> None:
293+
lf1 = pl.DataFrame({"a": [1, 2]}).lazy()
294+
lf2 = pl.DataFrame({"b": [3, 4, 5]}).lazy()
295+
296+
result = pl.concat([lf1, lf2], how="horizontal")
297+
assert isinstance(result, pl.LazyFrame)
298+
299+
collected = result.collect()
300+
expected = pl.DataFrame({"a": [1, 2, None], "b": [3, 4, 5]})
301+
assert_frame_equal(collected, expected)
302+
303+
304+
def test_concat_lazyframe_diagonal() -> None:
305+
lf1 = pl.DataFrame({"a": [1, 2], "b": [3, 4]}).lazy()
306+
lf2 = pl.DataFrame({"a": [5, 6], "c": [7, 8]}).lazy()
307+
308+
result = pl.concat([lf1, lf2], how="diagonal")
309+
assert isinstance(result, pl.LazyFrame)
310+
311+
collected = result.collect()
312+
expected = pl.DataFrame(
313+
{"a": [1, 2, 5, 6], "b": [3, 4, None, None], "c": [None, None, 7, 8]}
314+
)
315+
assert_frame_equal(collected, expected)
316+
317+
318+
def test_concat_series_invalid_strategy() -> None:
319+
s1 = pl.Series("a", [1, 2, 3])
320+
s2 = pl.Series("b", [4, 5, 6])
321+
322+
with pytest.raises(
323+
ValueError, match="Series only supports 'vertical' concat strategy"
324+
):
325+
pl.concat([s1, s2], how="horizontal")
326+
327+
with pytest.raises(
328+
ValueError, match="Series only supports 'vertical' concat strategy"
329+
):
330+
pl.concat([s1, s2], how="diagonal")
331+
332+
333+
def test_concat_invalid_how_parameter() -> None:
334+
df1 = pl.DataFrame({"a": [1, 2]})
335+
df2 = pl.DataFrame({"a": [3, 4]})
336+
337+
with pytest.raises(ValueError, match="DataFrame `how` must be one of"):
338+
pl.concat([df1, df2], how="invalid_strategy") # type: ignore[arg-type]
339+
340+
341+
def test_concat_unsupported_type() -> None:
342+
with pytest.raises(TypeError, match="did not expect type"):
343+
pl.concat([1, 2, 3]) # type: ignore[type-var]
344+
345+
346+
def test_concat_expressions() -> None:
347+
expr1 = pl.col("a")
348+
expr2 = pl.col("b")
349+
concat_expr = pl.concat([expr1, expr2])
350+
351+
df_input = pl.DataFrame({"a": [1, 2], "b": [3, 4]})
352+
result = df_input.select(concat_expr.alias("concatenated"))
353+
354+
expected = pl.DataFrame({"concatenated": [1, 2, 3, 4]})
355+
assert_frame_equal(result, expected)
356+
357+
358+
def test_concat_with_empty_dataframes() -> None:
359+
empty_df = pl.DataFrame(schema={"a": pl.Int64, "b": pl.String})
360+
df_with_data = pl.DataFrame({"a": [1, 2], "b": ["x", "y"]})
361+
362+
result = pl.concat([empty_df, df_with_data])
363+
assert_frame_equal(result, df_with_data)
364+
365+
result2 = pl.concat([df_with_data, empty_df])
366+
assert_frame_equal(result2, df_with_data)

py-polars/tests/unit/operations/test_concat.py

Lines changed: 0 additions & 109 deletions
This file was deleted.

0 commit comments

Comments
 (0)