@@ -24,6 +24,7 @@ use arrow::array::{
24
24
use arrow:: buffer:: { OffsetBuffer , ScalarBuffer } ;
25
25
use arrow:: datatypes:: { ByteArrayType , DataType , GenericBinaryType } ;
26
26
use datafusion_common:: utils:: proxy:: VecAllocExt ;
27
+ use datafusion_common:: { DataFusionError , Result } ;
27
28
use datafusion_physical_expr_common:: binary_map:: { OutputType , INITIAL_BUFFER_CAPACITY } ;
28
29
use itertools:: izip;
29
30
use std:: mem:: size_of;
80
81
self . do_equal_to_inner ( lhs_row, array, rhs_row)
81
82
}
82
83
83
- fn append_val_inner < B > ( & mut self , array : & ArrayRef , row : usize )
84
+ fn append_val_inner < B > ( & mut self , array : & ArrayRef , row : usize ) -> Result < ( ) >
84
85
where
85
86
B : ByteArrayType ,
86
87
{
92
93
self . offsets . push ( O :: usize_as ( offset) ) ;
93
94
} else {
94
95
self . nulls . append ( false ) ;
95
- self . do_append_val_inner ( arr, row) ;
96
+ self . do_append_val_inner ( arr, row) ? ;
96
97
}
98
+
99
+ Ok ( ( ) )
97
100
}
98
101
99
102
fn vectorized_equal_to_inner < B > (
@@ -123,7 +126,11 @@ where
123
126
}
124
127
}
125
128
126
- fn vectorized_append_inner < B > ( & mut self , array : & ArrayRef , rows : & [ usize ] )
129
+ fn vectorized_append_inner < B > (
130
+ & mut self ,
131
+ array : & ArrayRef ,
132
+ rows : & [ usize ] ,
133
+ ) -> Result < ( ) >
127
134
where
128
135
B : ByteArrayType ,
129
136
{
@@ -141,22 +148,14 @@ where
141
148
match all_null_or_non_null {
142
149
None => {
143
150
for & row in rows {
144
- if arr. is_null ( row) {
145
- self . nulls . append ( true ) ;
146
- // nulls need a zero length in the offset buffer
147
- let offset = self . buffer . len ( ) ;
148
- self . offsets . push ( O :: usize_as ( offset) ) ;
149
- } else {
150
- self . nulls . append ( false ) ;
151
- self . do_append_val_inner ( arr, row) ;
152
- }
151
+ self . append_val_inner :: < B > ( array, row) ?
153
152
}
154
153
}
155
154
156
155
Some ( true ) => {
157
156
self . nulls . append_n ( rows. len ( ) , false ) ;
158
157
for & row in rows {
159
- self . do_append_val_inner ( arr, row) ;
158
+ self . do_append_val_inner ( arr, row) ? ;
160
159
}
161
160
}
162
161
@@ -168,6 +167,8 @@ where
168
167
self . offsets . resize ( new_len, O :: usize_as ( offset) ) ;
169
168
}
170
169
}
170
+
171
+ Ok ( ( ) )
171
172
}
172
173
173
174
fn do_equal_to_inner < B > (
@@ -188,20 +189,26 @@ where
188
189
self . value ( lhs_row) == ( array. value ( rhs_row) . as_ref ( ) as & [ u8 ] )
189
190
}
190
191
191
- fn do_append_val_inner < B > ( & mut self , array : & GenericByteArray < B > , row : usize )
192
+ fn do_append_val_inner < B > (
193
+ & mut self ,
194
+ array : & GenericByteArray < B > ,
195
+ row : usize ,
196
+ ) -> Result < ( ) >
192
197
where
193
198
B : ByteArrayType ,
194
199
{
195
200
let value: & [ u8 ] = array. value ( row) . as_ref ( ) ;
196
201
self . buffer . append_slice ( value) ;
197
202
198
- assert ! (
199
- self . buffer. len( ) <= self . max_buffer_size,
200
- "offset overflow, buffer size > {}" ,
201
- self . max_buffer_size
202
- ) ;
203
+ if self . buffer . len ( ) > self . max_buffer_size {
204
+ return Err ( DataFusionError :: Execution ( format ! (
205
+ "offset overflow, buffer size > {}" ,
206
+ self . max_buffer_size
207
+ ) ) ) ;
208
+ }
203
209
204
210
self . offsets . push ( O :: usize_as ( self . buffer . len ( ) ) ) ;
211
+ Ok ( ( ) )
205
212
}
206
213
207
214
/// return the current value of the specified row irrespective of null
@@ -238,25 +245,27 @@ where
238
245
}
239
246
}
240
247
241
- fn append_val ( & mut self , column : & ArrayRef , row : usize ) {
248
+ fn append_val ( & mut self , column : & ArrayRef , row : usize ) -> Result < ( ) > {
242
249
// Sanity array type
243
250
match self . output_type {
244
251
OutputType :: Binary => {
245
252
debug_assert ! ( matches!(
246
253
column. data_type( ) ,
247
254
DataType :: Binary | DataType :: LargeBinary
248
255
) ) ;
249
- self . append_val_inner :: < GenericBinaryType < O > > ( column, row)
256
+ self . append_val_inner :: < GenericBinaryType < O > > ( column, row) ?
250
257
}
251
258
OutputType :: Utf8 => {
252
259
debug_assert ! ( matches!(
253
260
column. data_type( ) ,
254
261
DataType :: Utf8 | DataType :: LargeUtf8
255
262
) ) ;
256
- self . append_val_inner :: < GenericStringType < O > > ( column, row)
263
+ self . append_val_inner :: < GenericStringType < O > > ( column, row) ?
257
264
}
258
265
_ => unreachable ! ( "View types should use `ArrowBytesViewMap`" ) ,
259
266
} ;
267
+
268
+ Ok ( ( ) )
260
269
}
261
270
262
271
fn vectorized_equal_to (
@@ -296,24 +305,26 @@ where
296
305
}
297
306
}
298
307
299
- fn vectorized_append ( & mut self , column : & ArrayRef , rows : & [ usize ] ) {
308
+ fn vectorized_append ( & mut self , column : & ArrayRef , rows : & [ usize ] ) -> Result < ( ) > {
300
309
match self . output_type {
301
310
OutputType :: Binary => {
302
311
debug_assert ! ( matches!(
303
312
column. data_type( ) ,
304
313
DataType :: Binary | DataType :: LargeBinary
305
314
) ) ;
306
- self . vectorized_append_inner :: < GenericBinaryType < O > > ( column, rows)
315
+ self . vectorized_append_inner :: < GenericBinaryType < O > > ( column, rows) ?
307
316
}
308
317
OutputType :: Utf8 => {
309
318
debug_assert ! ( matches!(
310
319
column. data_type( ) ,
311
320
DataType :: Utf8 | DataType :: LargeUtf8
312
321
) ) ;
313
- self . vectorized_append_inner :: < GenericStringType < O > > ( column, rows)
322
+ self . vectorized_append_inner :: < GenericStringType < O > > ( column, rows) ?
314
323
}
315
324
_ => unreachable ! ( "View types should use `ArrowBytesViewMap`" ) ,
316
325
} ;
326
+
327
+ Ok ( ( ) )
317
328
}
318
329
319
330
fn len ( & self ) -> usize {
@@ -421,12 +432,12 @@ mod tests {
421
432
422
433
use crate :: aggregates:: group_values:: multi_group_by:: bytes:: ByteGroupValueBuilder ;
423
434
use arrow:: array:: { ArrayRef , NullBufferBuilder , StringArray } ;
435
+ use datafusion_common:: DataFusionError ;
424
436
use datafusion_physical_expr:: binary_map:: OutputType ;
425
437
426
438
use super :: GroupColumn ;
427
439
428
440
#[ test]
429
- #[ should_panic]
430
441
fn test_byte_group_value_builder_overflow ( ) {
431
442
let mut builder = ByteGroupValueBuilder :: < i32 > :: new ( OutputType :: Utf8 ) ;
432
443
@@ -435,31 +446,36 @@ mod tests {
435
446
let array =
436
447
Arc :: new ( StringArray :: from ( vec ! [ Some ( large_string. as_str( ) ) ] ) ) as ArrayRef ;
437
448
438
- // Append items until our buffer length is 1 + i32::MAX as usize
439
- for _ in 0 ..2048 {
440
- builder. append_val ( & array, 0 ) ;
449
+ // Append items until our buffer length is i32::MAX as usize
450
+ for _ in 0 ..2047 {
451
+ builder. append_val ( & array, 0 ) . unwrap ( ) ;
441
452
}
442
453
443
- assert_eq ! ( builder. value( 2047 ) , large_string. as_bytes( ) ) ;
454
+ assert ! ( matches!(
455
+ builder. append_val( & array, 0 ) ,
456
+ Err ( DataFusionError :: Execution ( e) ) if e. contains( "offset overflow" )
457
+ ) ) ;
458
+
459
+ assert_eq ! ( builder. value( 2046 ) , large_string. as_bytes( ) ) ;
444
460
}
445
461
446
462
#[ test]
447
463
fn test_byte_take_n ( ) {
448
464
let mut builder = ByteGroupValueBuilder :: < i32 > :: new ( OutputType :: Utf8 ) ;
449
465
let array = Arc :: new ( StringArray :: from ( vec ! [ Some ( "a" ) , None ] ) ) as ArrayRef ;
450
466
// a, null, null
451
- builder. append_val ( & array, 0 ) ;
452
- builder. append_val ( & array, 1 ) ;
453
- builder. append_val ( & array, 1 ) ;
467
+ builder. append_val ( & array, 0 ) . unwrap ( ) ;
468
+ builder. append_val ( & array, 1 ) . unwrap ( ) ;
469
+ builder. append_val ( & array, 1 ) . unwrap ( ) ;
454
470
455
471
// (a, null) remaining: null
456
472
let output = builder. take_n ( 2 ) ;
457
473
assert_eq ! ( & output, & array) ;
458
474
459
475
// null, a, null, a
460
- builder. append_val ( & array, 0 ) ;
461
- builder. append_val ( & array, 1 ) ;
462
- builder. append_val ( & array, 0 ) ;
476
+ builder. append_val ( & array, 0 ) . unwrap ( ) ;
477
+ builder. append_val ( & array, 1 ) . unwrap ( ) ;
478
+ builder. append_val ( & array, 0 ) . unwrap ( ) ;
463
479
464
480
// (null, a) remaining: (null, a)
465
481
let output = builder. take_n ( 2 ) ;
@@ -473,9 +489,9 @@ mod tests {
473
489
] ) ) as ArrayRef ;
474
490
475
491
// null, a, longstringfortest, null, null
476
- builder. append_val ( & array, 2 ) ;
477
- builder. append_val ( & array, 1 ) ;
478
- builder. append_val ( & array, 1 ) ;
492
+ builder. append_val ( & array, 2 ) . unwrap ( ) ;
493
+ builder. append_val ( & array, 1 ) . unwrap ( ) ;
494
+ builder. append_val ( & array, 1 ) . unwrap ( ) ;
479
495
480
496
// (null, a, longstringfortest, null) remaining: (null)
481
497
let output = builder. take_n ( 4 ) ;
@@ -494,7 +510,7 @@ mod tests {
494
510
builder_array : & ArrayRef ,
495
511
append_rows : & [ usize ] | {
496
512
for & index in append_rows {
497
- builder. append_val ( builder_array, index) ;
513
+ builder. append_val ( builder_array, index) . unwrap ( ) ;
498
514
}
499
515
} ;
500
516
@@ -517,7 +533,9 @@ mod tests {
517
533
let append = |builder : & mut ByteGroupValueBuilder < i32 > ,
518
534
builder_array : & ArrayRef ,
519
535
append_rows : & [ usize ] | {
520
- builder. vectorized_append ( builder_array, append_rows) ;
536
+ builder
537
+ . vectorized_append ( builder_array, append_rows)
538
+ . unwrap ( ) ;
521
539
} ;
522
540
523
541
let equal_to = |builder : & ByteGroupValueBuilder < i32 > ,
@@ -551,7 +569,9 @@ mod tests {
551
569
None ,
552
570
None ,
553
571
] ) ) as _ ;
554
- builder. vectorized_append ( & all_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] ) ;
572
+ builder
573
+ . vectorized_append ( & all_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] )
574
+ . unwrap ( ) ;
555
575
556
576
let mut equal_to_results = vec ! [ true ; all_nulls_input_array. len( ) ] ;
557
577
builder. vectorized_equal_to (
@@ -575,7 +595,9 @@ mod tests {
575
595
Some ( "string4" ) ,
576
596
Some ( "string5" ) ,
577
597
] ) ) as _ ;
578
- builder. vectorized_append ( & all_not_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] ) ;
598
+ builder
599
+ . vectorized_append ( & all_not_nulls_input_array, & [ 0 , 1 , 2 , 3 , 4 ] )
600
+ . unwrap ( ) ;
579
601
580
602
let mut equal_to_results = vec ! [ true ; all_not_nulls_input_array. len( ) ] ;
581
603
builder. vectorized_equal_to (
0 commit comments