Skip to content

Commit 21693a4

Browse files
authored
fix a bug in variable sized equality (#1209)
A missing validity buffer was being treated as all values being null, rather than all values being valid, causing equality to fail on some equivalent string and binary arrays.
1 parent b2416ff commit 21693a4

File tree

2 files changed

+41
-7
lines changed

2 files changed

+41
-7
lines changed

arrow/src/array/equal/mod.rs

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,10 @@ mod tests {
304304
use std::sync::Arc;
305305

306306
use crate::array::{
307-
array::Array, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait, BooleanArray,
308-
DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray,
309-
Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray,
310-
StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
307+
array::Array, ArrayData, ArrayDataBuilder, ArrayRef, BinaryOffsetSizeTrait,
308+
BooleanArray, DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder,
309+
GenericBinaryArray, Int32Builder, ListBuilder, NullArray, PrimitiveBuilder,
310+
StringArray, StringDictionaryBuilder, StringOffsetSizeTrait, StructArray,
311311
};
312312
use crate::array::{GenericStringArray, Int32Array};
313313
use crate::buffer::Buffer;
@@ -1297,4 +1297,38 @@ mod tests {
12971297
);
12981298
test_equal(&a, &b, false);
12991299
}
1300+
1301+
#[test]
1302+
fn test_non_null_empty_strings() {
1303+
let s = StringArray::from(vec![Some(""), Some(""), Some("")]);
1304+
1305+
let string1 = s.data();
1306+
1307+
let string2 = ArrayData::builder(DataType::Utf8)
1308+
.len(string1.len())
1309+
.buffers(string1.buffers().to_vec())
1310+
.build()
1311+
.unwrap();
1312+
1313+
// string2 is identical to string1 except that it has no validity buffer but since there
1314+
// are no nulls, string1 and string2 are equal
1315+
test_equal(string1, &string2, true);
1316+
}
1317+
1318+
#[test]
1319+
fn test_null_empty_strings() {
1320+
let s = StringArray::from(vec![Some(""), None, Some("")]);
1321+
1322+
let string1 = s.data();
1323+
1324+
let string2 = ArrayData::builder(DataType::Utf8)
1325+
.len(string1.len())
1326+
.buffers(string1.buffers().to_vec())
1327+
.build()
1328+
.unwrap();
1329+
1330+
// string2 is identical to string1 except that it has no validity buffer since string1 has
1331+
// nulls in it, string1 and string2 are not equal
1332+
test_equal(string1, &string2, false);
1333+
}
13001334
}

arrow/src/array/equal/variable_size.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,13 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
8686
let lhs_pos = lhs_start + i;
8787
let rhs_pos = rhs_start + i;
8888

89-
// the null bits can still be `None`, so we don't unwrap
89+
// the null bits can still be `None`, indicating that the value is valid.
9090
let lhs_is_null = !lhs_nulls
9191
.map(|v| get_bit(v.as_slice(), lhs.offset() + lhs_pos))
92-
.unwrap_or(false);
92+
.unwrap_or(true);
9393
let rhs_is_null = !rhs_nulls
9494
.map(|v| get_bit(v.as_slice(), rhs.offset() + rhs_pos))
95-
.unwrap_or(false);
95+
.unwrap_or(true);
9696

9797
lhs_is_null
9898
|| (lhs_is_null == rhs_is_null)

0 commit comments

Comments
 (0)