Skip to content

Commit

Permalink
fix: Properly choose inner physical type for Array (#18942)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Sep 26, 2024
1 parent d85240d commit 503582e
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 4 deletions.
23 changes: 19 additions & 4 deletions crates/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,21 @@ impl ArrayChunked {
ArrowDataType::FixedSizeList(
Box::new(ArrowField::new(
PlSmallStr::from_static("item"),
inner_dtype.to_arrow(CompatLevel::newest()),
inner_dtype.to_physical().to_arrow(CompatLevel::newest()),
true,
)),
width,
),
length,
);
ChunkedArray::with_chunk(name, arr)
// SAFETY: physical type matches the logical.
unsafe {
ChunkedArray::from_chunks_and_dtype(
name,
vec![Box::new(arr)],
DataType::Array(Box::new(inner_dtype.clone()), width),
)
}
}
}

Expand All @@ -147,14 +154,22 @@ impl ChunkFull<&Series> for ArrayChunked {
let arrow_dtype = ArrowDataType::FixedSizeList(
Box::new(ArrowField::new(
PlSmallStr::from_static("item"),
dtype.to_arrow(CompatLevel::newest()),
dtype.to_physical().to_arrow(CompatLevel::newest()),
true,
)),
width,
);
let value = value.rechunk().chunks()[0].clone();
let arr = FixedSizeListArray::full(length, value, arrow_dtype);
ChunkedArray::with_chunk(name, arr)

// SAFETY: physical type matches the logical.
unsafe {
ChunkedArray::from_chunks_and_dtype(
name,
vec![Box::new(arr)],
DataType::Array(Box::new(dtype.clone()), width),
)
}
}
}

Expand Down
15 changes: 15 additions & 0 deletions py-polars/tests/unit/datatypes/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,18 @@ def test_array_inner_recursive_python_dtype() -> None:
def test_array_missing_shape() -> None:
with pytest.raises(TypeError):
pl.Array(pl.Int8)


def test_array_invalid_physical_type_18920() -> None:
s1 = pl.Series("x", [[1000, 2000]], pl.List(pl.Datetime))
s2 = pl.Series("x", [None], pl.List(pl.Datetime))

df1 = s1.to_frame().with_columns(pl.col.x.list.to_array(2))
df2 = s2.to_frame().with_columns(pl.col.x.list.to_array(2))

df = pl.concat([df1, df2])

expected_s = pl.Series("x", [[1000, 2000], None], pl.List(pl.Datetime))

expected = expected_s.to_frame().with_columns(pl.col.x.list.to_array(2))
assert_frame_equal(df, expected)

0 comments on commit 503582e

Please sign in to comment.