Skip to content

Commit a132be6

Browse files
alambbjchambers
andcommitted
Fix validation for offsets of StructArrays (#942)
* reproduce validation error * Fix validation bug Co-authored-by: Ben Chambers <bjchambers@gmail.com>
1 parent 1af9ca5 commit a132be6

File tree

1 file changed

+103
-5
lines changed

1 file changed

+103
-5
lines changed

arrow/src/array/data.rs

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -790,12 +790,11 @@ impl ArrayData {
790790
for (i, field) in fields.iter().enumerate() {
791791
let field_data = self.get_valid_child_data(i, field.data_type())?;
792792

793-
// C++ does this check, but it is not clear why
794-
// field_data checks only len, but self checks len+offset
795-
if field_data.len < (self.len + self.offset) {
793+
// Ensure child field has sufficient size
794+
if field_data.len < self.len {
796795
return Err(ArrowError::InvalidArgumentError(format!(
797796
"{} child array #{} for field {} has length smaller than expected for struct array ({} < {})",
798-
self.data_type, i, field.name(), field_data.len, self.len + self.offset
797+
self.data_type, i, field.name(), field_data.len, self.len
799798
)));
800799
}
801800
}
@@ -1114,7 +1113,9 @@ impl ArrayDataBuilder {
11141113
mod tests {
11151114
use super::*;
11161115

1117-
use crate::array::{Array, Int32Array, StringArray};
1116+
use crate::array::{
1117+
Array, BooleanBuilder, Int32Array, Int32Builder, StringArray, StructBuilder,
1118+
};
11181119
use crate::buffer::Buffer;
11191120
use crate::datatypes::Field;
11201121
use crate::util::bit_util;
@@ -1640,4 +1641,101 @@ mod tests {
16401641
fn make_f32_buffer(n: usize) -> Buffer {
16411642
Buffer::from_slice_ref(&vec![42f32; n])
16421643
}
1644+
1645+
#[test]
1646+
fn test_try_new_sliced_struct() {
1647+
let mut builder = StructBuilder::new(
1648+
vec![
1649+
Field::new("a", DataType::Int32, true),
1650+
Field::new("b", DataType::Boolean, true),
1651+
],
1652+
vec![
1653+
Box::new(Int32Builder::new(5)),
1654+
Box::new(BooleanBuilder::new(5)),
1655+
],
1656+
);
1657+
1658+
// struct[0] = { a: 10, b: true }
1659+
builder
1660+
.field_builder::<Int32Builder>(0)
1661+
.unwrap()
1662+
.append_option(Some(10))
1663+
.unwrap();
1664+
builder
1665+
.field_builder::<BooleanBuilder>(1)
1666+
.unwrap()
1667+
.append_option(Some(true))
1668+
.unwrap();
1669+
builder.append(true).unwrap();
1670+
1671+
// struct[1] = null
1672+
builder
1673+
.field_builder::<Int32Builder>(0)
1674+
.unwrap()
1675+
.append_option(None)
1676+
.unwrap();
1677+
builder
1678+
.field_builder::<BooleanBuilder>(1)
1679+
.unwrap()
1680+
.append_option(None)
1681+
.unwrap();
1682+
builder.append(false).unwrap();
1683+
1684+
// struct[2] = { a: null, b: false }
1685+
builder
1686+
.field_builder::<Int32Builder>(0)
1687+
.unwrap()
1688+
.append_option(None)
1689+
.unwrap();
1690+
builder
1691+
.field_builder::<BooleanBuilder>(1)
1692+
.unwrap()
1693+
.append_option(Some(false))
1694+
.unwrap();
1695+
builder.append(true).unwrap();
1696+
1697+
// struct[3] = { a: 21, b: null }
1698+
builder
1699+
.field_builder::<Int32Builder>(0)
1700+
.unwrap()
1701+
.append_option(Some(21))
1702+
.unwrap();
1703+
builder
1704+
.field_builder::<BooleanBuilder>(1)
1705+
.unwrap()
1706+
.append_option(None)
1707+
.unwrap();
1708+
builder.append(true).unwrap();
1709+
1710+
// struct[4] = { a: 18, b: false }
1711+
builder
1712+
.field_builder::<Int32Builder>(0)
1713+
.unwrap()
1714+
.append_option(Some(18))
1715+
.unwrap();
1716+
builder
1717+
.field_builder::<BooleanBuilder>(1)
1718+
.unwrap()
1719+
.append_option(Some(false))
1720+
.unwrap();
1721+
builder.append(true).unwrap();
1722+
1723+
let struct_array = builder.finish();
1724+
let struct_array_slice = struct_array.slice(1, 3);
1725+
let struct_array_data = struct_array_slice.data();
1726+
1727+
let cloned_data = ArrayData::try_new(
1728+
struct_array_slice.data_type().clone(),
1729+
struct_array_slice.len(),
1730+
None, // force new to compute the number of null bits
1731+
struct_array_data.null_buffer().cloned(),
1732+
struct_array_slice.offset(),
1733+
struct_array_data.buffers().to_vec(),
1734+
struct_array_data.child_data().to_vec(),
1735+
)
1736+
.unwrap();
1737+
let cloned = crate::array::make_array(cloned_data);
1738+
1739+
assert_eq!(&struct_array_slice, &cloned);
1740+
}
16431741
}

0 commit comments

Comments
 (0)