Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: utility functions for creating FixedSizeList and LargeList dtypes #5373

Merged
merged 1 commit into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,7 @@ mod tests {
let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);

// Construct a list array from the above two
let list_data_type =
DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::new_large_list(DataType::Int32, false);
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_buffer(value_offsets.clone())
Expand Down Expand Up @@ -863,8 +862,7 @@ mod tests {
bit_util::set_bit(&mut null_bits, 8);

// Construct a list array from the above two
let list_data_type =
DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::new_large_list(DataType::Int32, false);
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
Expand Down Expand Up @@ -929,8 +927,7 @@ mod tests {
bit_util::set_bit(&mut null_bits, 8);

// Construct a list array from the above two
let list_data_type =
DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, false)));
let list_data_type = DataType::new_large_list(DataType::Int32, false);
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
Expand Down
6 changes: 6 additions & 0 deletions arrow-ipc/src/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,12 @@ mod tests {
Field::new("utf8", DataType::Utf8, false),
Field::new("binary", DataType::Binary, false),
Field::new_list("list[u8]", Field::new("item", DataType::UInt8, false), true),
Field::new_fixed_size_list(
"fixed_size_list[u8]",
Field::new("item", DataType::UInt8, false),
2,
true,
),
Field::new_list(
"list[struct<float32, int32, bool>]",
Field::new_struct(
Expand Down
18 changes: 18 additions & 0 deletions arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,24 @@ impl DataType {
pub fn new_list(data_type: DataType, nullable: bool) -> Self {
DataType::List(Arc::new(Field::new_list_field(data_type, nullable)))
}

/// Create a [`DataType::LargeList`] with elements of the specified type
/// and nullability, and conventionally named inner [`Field`] (`"item"`).
///
/// To specify field level metadata, construct the inner [`Field`]
/// directly via [`Field::new`] or [`Field::new_list_field`].
pub fn new_large_list(data_type: DataType, nullable: bool) -> Self {
DataType::LargeList(Arc::new(Field::new_list_field(data_type, nullable)))
}

/// Create a [`DataType::FixedSizeList`] with elements of the specified type, size
/// and nullability, and conventionally named inner [`Field`] (`"item"`).
///
/// To specify field level metadata, construct the inner [`Field`]
/// directly via [`Field::new`] or [`Field::new_list_field`].
pub fn new_fixed_size_list(data_type: DataType, size: i32, nullable: bool) -> Self {
DataType::FixedSizeList(Arc::new(Field::new_list_field(data_type, nullable)), size)
}
}

/// The maximum precision for [DataType::Decimal128] values
Expand Down
15 changes: 15 additions & 0 deletions arrow-schema/src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,21 @@ impl Field {
Self::new(name, DataType::LargeList(value.into()), nullable)
}

/// Create a new [`Field`] with [`DataType::FixedSizeList`]
///
/// - `name`: the name of the [`DataType::FixedSizeList`] field
/// - `value`: the description of each list element
/// - `size`: the size of the fixed size list
/// - `nullable`: if the [`DataType::FixedSizeList`] array is nullable
pub fn new_fixed_size_list(
name: impl Into<String>,
value: impl Into<FieldRef>,
size: i32,
nullable: bool,
) -> Self {
Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
}

/// Create a new [`Field`] with [`DataType::Map`]
///
/// - `name`: the name of the [`DataType::Map`] field
Expand Down
6 changes: 1 addition & 5 deletions arrow-schema/src/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,11 +444,7 @@ mod tests {
Field::new("floats", DataType::Struct(floats.clone()), true),
true,
),
Field::new(
"f",
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3),
false,
),
Field::new_fixed_size_list("f", Field::new("item", DataType::Int32, false), 3, false),
Field::new_map(
"g",
"entries",
Expand Down
6 changes: 2 additions & 4 deletions arrow-select/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1261,8 +1261,7 @@ mod tests {
.add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
.build()
.unwrap();
let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
let list_data_type = DataType::new_fixed_size_list(DataType::Int32, 3, false);
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
Expand Down Expand Up @@ -1318,8 +1317,7 @@ mod tests {
bit_util::set_bit(&mut null_bits, 3);
bit_util::set_bit(&mut null_bits, 4);

let list_data_type =
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
let list_data_type = DataType::new_fixed_size_list(DataType::Int32, 2, false);
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
Expand Down
Loading