Skip to content

Commit 2bcc0cf

Browse files
authored
initial commit (#1564)
1 parent 6083a91 commit 2bcc0cf

File tree

4 files changed

+302
-9
lines changed

4 files changed

+302
-9
lines changed

arrow-pyarrow-integration-testing/tests/test_sql.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,11 @@ def assert_pyarrow_leak():
6161
pa.decimal128(19, 4),
6262
pa.string(),
6363
pa.binary(),
64+
pa.binary(10),
6465
pa.large_string(),
6566
pa.large_binary(),
6667
pa.list_(pa.int32()),
68+
pa.list_(pa.int32(), 2),
6769
pa.large_list(pa.uint16()),
6870
pa.struct(
6971
[
@@ -85,8 +87,6 @@ def assert_pyarrow_leak():
8587
_unsupported_pyarrow_types = [
8688
pa.decimal256(76, 38),
8789
pa.duration("s"),
88-
pa.binary(10),
89-
pa.list_(pa.int32(), 2),
9090
pa.map_(pa.string(), pa.int32()),
9191
pa.union(
9292
[pa.field("a", pa.binary(10)), pa.field("b", pa.string())],
@@ -190,6 +190,29 @@ def test_time32_python():
190190
del b
191191
del expected
192192

193+
def test_binary_array():
194+
"""
195+
Python -> Rust -> Python
196+
"""
197+
a = pa.array(["a", None, "bb", "ccc"], pa.binary())
198+
b = rust.round_trip_array(a)
199+
b.validate(full=True)
200+
assert a.to_pylist() == b.to_pylist()
201+
assert a.type == b.type
202+
del a
203+
del b
204+
205+
def test_fixed_len_binary_array():
206+
"""
207+
Python -> Rust -> Python
208+
"""
209+
a = pa.array(["aaa", None, "bbb", "ccc"], pa.binary(3))
210+
b = rust.round_trip_array(a)
211+
b.validate(full=True)
212+
assert a.to_pylist() == b.to_pylist()
213+
assert a.type == b.type
214+
del a
215+
del b
193216

194217
def test_list_array():
195218
"""
@@ -203,6 +226,17 @@ def test_list_array():
203226
del a
204227
del b
205228

229+
def test_fixed_len_list_array():
230+
"""
231+
Python -> Rust -> Python
232+
"""
233+
a = pa.array([[1, 2], None, [3, 4], [5, 6]], pa.list_(pa.int64(), 2))
234+
b = rust.round_trip_array(a)
235+
b.validate(full=True)
236+
assert a.to_pylist() == b.to_pylist()
237+
assert a.type == b.type
238+
del a
239+
del b
206240

207241
def test_timestamp_python():
208242
"""

arrow/src/array/ffi.rs

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,14 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
4545

4646
#[cfg(test)]
4747
mod tests {
48-
use crate::array::{DictionaryArray, Int32Array, StringArray};
48+
use crate::array::{DictionaryArray, FixedSizeListArray, Int32Array, StringArray};
49+
use crate::buffer::Buffer;
4950
use crate::error::Result;
51+
use crate::util::bit_util;
5052
use crate::{
5153
array::{
52-
Array, ArrayData, BooleanArray, Int64Array, StructArray, UInt32Array,
53-
UInt64Array,
54+
Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array,
55+
StructArray, UInt32Array, UInt64Array,
5456
},
5557
datatypes::{DataType, Field},
5658
ffi::ArrowArray,
@@ -149,4 +151,112 @@ mod tests {
149151
let data = array.data();
150152
test_round_trip(data)
151153
}
154+
155+
#[test]
156+
fn test_fixed_size_binary() -> Result<()> {
157+
let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
158+
let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
159+
160+
let data = array.data();
161+
test_round_trip(data)
162+
}
163+
164+
#[test]
165+
fn test_fixed_size_binary_with_nulls() -> Result<()> {
166+
let values = vec![
167+
None,
168+
Some(vec![10, 10, 10]),
169+
None,
170+
Some(vec![20, 20, 20]),
171+
Some(vec![30, 30, 30]),
172+
None,
173+
];
174+
let array = FixedSizeBinaryArray::try_from_sparse_iter(values.into_iter())?;
175+
176+
let data = array.data();
177+
test_round_trip(data)
178+
}
179+
180+
#[test]
181+
fn test_fixed_size_list() -> Result<()> {
182+
let v: Vec<i64> = (0..9).into_iter().collect();
183+
let value_data = ArrayData::builder(DataType::Int64)
184+
.len(9)
185+
.add_buffer(Buffer::from_slice_ref(&v))
186+
.build()?;
187+
let list_data_type =
188+
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int64, false)), 3);
189+
let list_data = ArrayData::builder(list_data_type)
190+
.len(3)
191+
.add_child_data(value_data)
192+
.build()?;
193+
let array = FixedSizeListArray::from(list_data);
194+
195+
let data = array.data();
196+
test_round_trip(data)
197+
}
198+
199+
#[test]
200+
fn test_fixed_size_list_with_nulls() -> Result<()> {
201+
// 0100 0110
202+
let mut validity_bits: [u8; 1] = [0; 1];
203+
bit_util::set_bit(&mut validity_bits, 1);
204+
bit_util::set_bit(&mut validity_bits, 2);
205+
bit_util::set_bit(&mut validity_bits, 6);
206+
207+
let v: Vec<i16> = (0..16).into_iter().collect();
208+
let value_data = ArrayData::builder(DataType::Int16)
209+
.len(16)
210+
.add_buffer(Buffer::from_slice_ref(&v))
211+
.build()?;
212+
let list_data_type =
213+
DataType::FixedSizeList(Box::new(Field::new("f", DataType::Int16, false)), 2);
214+
let list_data = ArrayData::builder(list_data_type)
215+
.len(8)
216+
.null_bit_buffer(Buffer::from(validity_bits))
217+
.add_child_data(value_data)
218+
.build()?;
219+
let array = FixedSizeListArray::from(list_data);
220+
221+
let data = array.data();
222+
test_round_trip(data)
223+
}
224+
225+
#[test]
226+
fn test_fixed_size_list_nested() -> Result<()> {
227+
let v: Vec<i32> = (0..16).into_iter().collect();
228+
let value_data = ArrayData::builder(DataType::Int32)
229+
.len(16)
230+
.add_buffer(Buffer::from_slice_ref(&v))
231+
.build()?;
232+
233+
let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
234+
let value_offsets = Buffer::from_slice_ref(&offsets);
235+
let inner_list_data_type =
236+
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
237+
let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
238+
.len(8)
239+
.add_buffer(value_offsets)
240+
.add_child_data(value_data)
241+
.build()?;
242+
243+
// 0000 0100
244+
let mut validity_bits: [u8; 1] = [0; 1];
245+
bit_util::set_bit(&mut validity_bits, 2);
246+
247+
let list_data_type = DataType::FixedSizeList(
248+
Box::new(Field::new("f", inner_list_data_type, false)),
249+
2,
250+
);
251+
let list_data = ArrayData::builder(list_data_type)
252+
.len(4)
253+
.null_bit_buffer(Buffer::from(validity_bits))
254+
.add_child_data(inner_list_data)
255+
.build()?;
256+
257+
let array = FixedSizeListArray::from(list_data);
258+
259+
let data = array.data();
260+
test_round_trip(data)
261+
}
152262
}

arrow/src/datatypes/ffi.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,23 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
6767
// Parametrized types, requiring string parse
6868
other => {
6969
match other.splitn(2, ':').collect::<Vec<&str>>().as_slice() {
70+
// FixedSizeBinary type in format "w:num_bytes"
71+
["w", num_bytes] => {
72+
let parsed_num_bytes = num_bytes.parse::<i32>().map_err(|_| {
73+
ArrowError::CDataInterface(
74+
"FixedSizeBinary requires an integer parameter representing number of bytes per element".to_string())
75+
})?;
76+
DataType::FixedSizeBinary(parsed_num_bytes)
77+
},
78+
// FixedSizeList type in format "+w:num_elems"
79+
["+w", num_elems] => {
80+
let c_child = c_schema.child(0);
81+
let parsed_num_elems = num_elems.parse::<i32>().map_err(|_| {
82+
ArrowError::CDataInterface(
83+
"The FixedSizeList type requires an integer parameter representing number of elements per list".to_string())
84+
})?;
85+
DataType::FixedSizeList(Box::new(Field::try_from(c_child)?), parsed_num_elems)
86+
},
7087
// Decimal types in format "d:precision,scale" or "d:precision,scale,bitWidth"
7188
["d", extra] => {
7289
match extra.splitn(3, ',').collect::<Vec<&str>>().as_slice() {
@@ -178,7 +195,9 @@ impl TryFrom<&DataType> for FFI_ArrowSchema {
178195
let format = get_format_string(dtype)?;
179196
// allocate and hold the children
180197
let children = match dtype {
181-
DataType::List(child) | DataType::LargeList(child) => {
198+
DataType::List(child)
199+
| DataType::LargeList(child)
200+
| DataType::FixedSizeList(child, _) => {
182201
vec![FFI_ArrowSchema::try_from(child.as_ref())?]
183202
}
184203
DataType::Struct(fields) => fields
@@ -215,6 +234,8 @@ fn get_format_string(dtype: &DataType) -> Result<String> {
215234
DataType::LargeBinary => Ok("Z".to_string()),
216235
DataType::Utf8 => Ok("u".to_string()),
217236
DataType::LargeUtf8 => Ok("U".to_string()),
237+
DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{}", num_bytes)),
238+
DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{}", num_elems)),
218239
DataType::Decimal(precision, scale) => Ok(format!("d:{},{}", precision, scale)),
219240
DataType::Date32 => Ok("tdD".to_string()),
220241
DataType::Date64 => Ok("tdm".to_string()),
@@ -325,6 +346,11 @@ mod tests {
325346
round_trip_type(DataType::Float64)?;
326347
round_trip_type(DataType::Date64)?;
327348
round_trip_type(DataType::Time64(TimeUnit::Nanosecond))?;
349+
round_trip_type(DataType::FixedSizeBinary(12))?;
350+
round_trip_type(DataType::FixedSizeList(
351+
Box::new(Field::new("a", DataType::Int64, false)),
352+
5,
353+
))?;
328354
round_trip_type(DataType::Utf8)?;
329355
round_trip_type(DataType::List(Box::new(Field::new(
330356
"a",

0 commit comments

Comments
 (0)