Skip to content

Commit 3fed612

Browse files
authored
Add CI check for full validation mode (#1546)
* Add force_validate feature * Disable some redundant checks * Add issue link * Add test with force_validate feature flag * fix up message * disable due to #1547 * disable ipc test failure * fix clippy * Fix doctest to pass with force_validate enabled
1 parent d701939 commit 3fed612

File tree

10 files changed

+54
-5
lines changed

10 files changed

+54
-5
lines changed

.github/workflows/rust.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ jobs:
111111
112112
# Switch to arrow crate
113113
cd arrow
114-
# re-run tests on arrow crate with additional features
114+
# re-run tests on arrow crate to ensure
115+
# all arrays are created correctly
116+
cargo test --features=force_validate
115117
cargo test --features=prettyprint
116118
# run test on arrow crate with minimal set of features
117119
cargo test --no-default-features

arrow/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ prettyprint = ["comfy-table"]
7171
# target without assuming an environment containing JavaScript.
7272
test_utils = ["rand"]
7373
pyarrow = ["pyo3"]
74+
# force_validate runs full data validation for all arrays that are created
75+
# this is not enabled by default as it is too computationally expensive
76+
# but is run as part of our CI checks
77+
force_validate = []
7478

7579
[dev-dependencies]
7680
rand = "0.8"

arrow/src/array/array_binary.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,9 @@ mod tests {
14261426
#[should_panic(
14271427
expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
14281428
)]
1429+
// Different error messages, so skip for now
1430+
// https://github.com/apache/arrow-rs/issues/1545
1431+
#[cfg(not(feature = "force_validate"))]
14291432
fn test_fixed_size_binary_array_from_incorrect_list_array() {
14301433
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
14311434
let values_data = ArrayData::builder(DataType::UInt32)

arrow/src/array/array_boolean.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ mod tests {
350350
#[test]
351351
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
352352
(values buffer)")]
353+
// Different error messages, so skip for now
354+
// https://github.com/apache/arrow-rs/issues/1545
355+
#[cfg(not(feature = "force_validate"))]
353356
fn test_boolean_array_invalid_buffer_len() {
354357
let data = unsafe {
355358
ArrayData::builder(DataType::Boolean)

arrow/src/array/array_list.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,9 @@ mod tests {
777777
#[should_panic(
778778
expected = "FixedSizeListArray child array length should be a multiple of 3"
779779
)]
780+
// Different error messages, so skip for now
781+
// https://github.com/apache/arrow-rs/issues/1545
782+
#[cfg(not(feature = "force_validate"))]
780783
fn test_fixed_size_list_array_unequal_children() {
781784
// Construct a value array
782785
let value_data = ArrayData::builder(DataType::Int32)
@@ -1065,6 +1068,9 @@ mod tests {
10651068
#[should_panic(
10661069
expected = "ListArray data should contain a single buffer only (value offsets)"
10671070
)]
1071+
// Different error messages, so skip for now
1072+
// https://github.com/apache/arrow-rs/issues/1545
1073+
#[cfg(not(feature = "force_validate"))]
10681074
fn test_list_array_invalid_buffer_len() {
10691075
let value_data = unsafe {
10701076
ArrayData::builder(DataType::Int32)
@@ -1087,6 +1093,9 @@ mod tests {
10871093
#[should_panic(
10881094
expected = "ListArray should contain a single child array (values array)"
10891095
)]
1096+
// Different error messages, so skip for now
1097+
// https://github.com/apache/arrow-rs/issues/1545
1098+
#[cfg(not(feature = "force_validate"))]
10901099
fn test_list_array_invalid_child_array_len() {
10911100
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
10921101
let list_data_type =
@@ -1137,6 +1146,9 @@ mod tests {
11371146

11381147
#[test]
11391148
#[should_panic(expected = "memory is not aligned")]
1149+
// Different error messages, so skip for now
1150+
// https://github.com/apache/arrow-rs/issues/1545
1151+
#[cfg(not(feature = "force_validate"))]
11401152
fn test_list_array_alignment() {
11411153
let ptr = alloc::allocate_aligned::<u8>(8);
11421154
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };

arrow/src/array/array_primitive.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,9 @@ mod tests {
10281028
#[test]
10291029
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
10301030
(values buffer)")]
1031+
// Different error messages, so skip for now
1032+
// https://github.com/apache/arrow-rs/issues/1545
1033+
#[cfg(not(feature = "force_validate"))]
10311034
fn test_primitive_array_invalid_buffer_len() {
10321035
let buffer = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4]);
10331036
let data = unsafe {

arrow/src/array/data.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ impl ArrayData {
272272
/// Note: This is a low level API and most users of the arrow
273273
/// crate should create arrays using the methods in the `array`
274274
/// module.
275+
#[allow(clippy::let_and_return)]
275276
pub unsafe fn new_unchecked(
276277
data_type: DataType,
277278
len: usize,
@@ -286,15 +287,20 @@ impl ArrayData {
286287
Some(null_count) => null_count,
287288
};
288289
let null_bitmap = null_bit_buffer.map(Bitmap::from);
289-
Self {
290+
let new_self = Self {
290291
data_type,
291292
len,
292293
null_count,
293294
offset,
294295
buffers,
295296
child_data,
296297
null_bitmap,
297-
}
298+
};
299+
300+
// Provide a force_validate mode
301+
#[cfg(feature = "force_validate")]
302+
new_self.validate_full().unwrap();
303+
new_self
298304
}
299305

300306
/// Create a new ArrayData, validating that the provided buffers
@@ -2340,7 +2346,7 @@ mod tests {
23402346

23412347
#[test]
23422348
#[should_panic(
2343-
expected = "child #0 invalid: Invalid argument error: Value at position 1 out of bounds: -1 (should be in [0, 1])"
2349+
expected = "Value at position 1 out of bounds: -1 (should be in [0, 1])"
23442350
)]
23452351
/// test that children are validated recursively (aka bugs in child data of struct also are flagged)
23462352
fn test_validate_recursive() {

arrow/src/compute/kernels/filter.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,9 @@ mod tests {
16921692
}
16931693

16941694
#[test]
1695+
// Fails when validation enabled
1696+
// https://github.com/apache/arrow-rs/issues/1547
1697+
#[cfg(not(feature = "force_validate"))]
16951698
fn test_filter_union_array_sparse() {
16961699
let mut builder = UnionBuilder::new_sparse(3);
16971700
builder.append::<Int32Type>("A", 1).unwrap();
@@ -1703,6 +1706,9 @@ mod tests {
17031706
}
17041707

17051708
#[test]
1709+
// Fails when validation enabled
1710+
// https://github.com/apache/arrow-rs/issues/1547
1711+
#[cfg(not(feature = "force_validate"))]
17061712
fn test_filter_union_array_sparse_with_nulls() {
17071713
let mut builder = UnionBuilder::new_sparse(4);
17081714
builder.append::<Int32Type>("A", 1).unwrap();

arrow/src/compute/kernels/substring.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,15 +109,23 @@ fn generic_substring<OffsetSize: StringOffsetSizeTrait>(
109109
///
110110
/// # Warning
111111
///
112-
/// This function **might** return in invalid utf-8 format if the character length falls on a non-utf8 boundary.
112+
/// This function **might** return in invalid utf-8 format if the
113+
/// character length falls on a non-utf8 boundary, which we
114+
/// [hope to fix](https://github.com/apache/arrow-rs/issues/1531)
115+
/// in a future release.
116+
///
113117
/// ## Example of getting an invalid substring
114118
/// ```
119+
/// # // Doesn't pass due to https://github.com/apache/arrow-rs/issues/1531
120+
/// # #[cfg(not(feature = "force_validate"))]
121+
/// # {
115122
/// # use arrow::array::StringArray;
116123
/// # use arrow::compute::kernels::substring::substring;
117124
/// let array = StringArray::from(vec![Some("E=mc²")]);
118125
/// let result = substring(&array, -1, &None).unwrap();
119126
/// let result = result.as_any().downcast_ref::<StringArray>().unwrap();
120127
/// assert_eq!(result.value(0).as_bytes(), &[0x00B2]); // invalid utf-8 format
128+
/// # }
121129
/// ```
122130
///
123131
/// # Error

arrow/src/ipc/reader.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,8 @@ mod tests {
11071107
}
11081108

11091109
#[test]
1110+
// https://github.com/apache/arrow-rs/issues/1548
1111+
#[cfg(not(feature = "force_validate"))]
11101112
fn projection_should_work() {
11111113
// complementary to the previous test
11121114
let testdata = crate::util::test_util::arrow_test_data();

0 commit comments

Comments
 (0)