diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index ee307cc3ca8e..e33d38118891 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -128,14 +128,21 @@ impl ArrayChunked { ArrowDataType::FixedSizeList( Box::new(ArrowField::new( PlSmallStr::from_static("item"), - inner_dtype.to_arrow(CompatLevel::newest()), + inner_dtype.to_physical().to_arrow(CompatLevel::newest()), true, )), width, ), length, ); - ChunkedArray::with_chunk(name, arr) + // SAFETY: physical type matches the logical. + unsafe { + ChunkedArray::from_chunks_and_dtype( + name, + vec![Box::new(arr)], + DataType::Array(Box::new(inner_dtype.clone()), width), + ) + } } } @@ -147,14 +154,22 @@ impl ChunkFull<&Series> for ArrayChunked { let arrow_dtype = ArrowDataType::FixedSizeList( Box::new(ArrowField::new( PlSmallStr::from_static("item"), - dtype.to_arrow(CompatLevel::newest()), + dtype.to_physical().to_arrow(CompatLevel::newest()), true, )), width, ); let value = value.rechunk().chunks()[0].clone(); let arr = FixedSizeListArray::full(length, value, arrow_dtype); - ChunkedArray::with_chunk(name, arr) + + // SAFETY: physical type matches the logical. + unsafe { + ChunkedArray::from_chunks_and_dtype( + name, + vec![Box::new(arr)], + DataType::Array(Box::new(dtype.clone()), width), + ) + } } } diff --git a/py-polars/tests/unit/datatypes/test_array.py b/py-polars/tests/unit/datatypes/test_array.py index 03f92bd68d11..6c4f240803bf 100644 --- a/py-polars/tests/unit/datatypes/test_array.py +++ b/py-polars/tests/unit/datatypes/test_array.py @@ -327,3 +327,18 @@ def test_array_inner_recursive_python_dtype() -> None: def test_array_missing_shape() -> None: with pytest.raises(TypeError): pl.Array(pl.Int8) + + +def test_array_invalid_physical_type_18920() -> None: + s1 = pl.Series("x", [[1000, 2000]], pl.List(pl.Datetime)) + s2 = pl.Series("x", [None], pl.List(pl.Datetime)) + + df1 = s1.to_frame().with_columns(pl.col.x.list.to_array(2)) + df2 = s2.to_frame().with_columns(pl.col.x.list.to_array(2)) + + df = pl.concat([df1, df2]) + + expected_s = pl.Series("x", [[1000, 2000], None], pl.List(pl.Datetime)) + + expected = expected_s.to_frame().with_columns(pl.col.x.list.to_array(2)) + assert_frame_equal(df, expected)