15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use crate :: { buffer:: MutableBuffer , datatypes:: DataType , util:: bit_util} ;
18
+ use crate :: {
19
+ buffer:: MutableBuffer ,
20
+ datatypes:: DataType ,
21
+ error:: { ArrowError , Result } ,
22
+ util:: bit_util,
23
+ } ;
19
24
20
25
use super :: {
21
26
data:: { into_buffers, new_buffers} ,
@@ -166,6 +171,65 @@ impl<'a> std::fmt::Debug for MutableArrayData<'a> {
166
171
}
167
172
}
168
173
174
+ /// Builds an extend that adds `offset` to the source primitive
175
+ /// Additionally validates that `max` fits into the
176
+ /// the underlying primitive returning None if not
177
+ fn build_extend_dictionary (
178
+ array : & ArrayData ,
179
+ offset : usize ,
180
+ max : usize ,
181
+ ) -> Option < Extend > {
182
+ use crate :: datatypes:: * ;
183
+ use std:: convert:: TryInto ;
184
+
185
+ match array. data_type ( ) {
186
+ DataType :: Dictionary ( child_data_type, _) => match child_data_type. as_ref ( ) {
187
+ DataType :: UInt8 => {
188
+ let _: u8 = max. try_into ( ) . ok ( ) ?;
189
+ let offset: u8 = offset. try_into ( ) . ok ( ) ?;
190
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
191
+ }
192
+ DataType :: UInt16 => {
193
+ let _: u16 = max. try_into ( ) . ok ( ) ?;
194
+ let offset: u16 = offset. try_into ( ) . ok ( ) ?;
195
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
196
+ }
197
+ DataType :: UInt32 => {
198
+ let _: u32 = max. try_into ( ) . ok ( ) ?;
199
+ let offset: u32 = offset. try_into ( ) . ok ( ) ?;
200
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
201
+ }
202
+ DataType :: UInt64 => {
203
+ let _: u64 = max. try_into ( ) . ok ( ) ?;
204
+ let offset: u64 = offset. try_into ( ) . ok ( ) ?;
205
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
206
+ }
207
+ DataType :: Int8 => {
208
+ let _: i8 = max. try_into ( ) . ok ( ) ?;
209
+ let offset: i8 = offset. try_into ( ) . ok ( ) ?;
210
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
211
+ }
212
+ DataType :: Int16 => {
213
+ let _: i16 = max. try_into ( ) . ok ( ) ?;
214
+ let offset: i16 = offset. try_into ( ) . ok ( ) ?;
215
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
216
+ }
217
+ DataType :: Int32 => {
218
+ let _: i32 = max. try_into ( ) . ok ( ) ?;
219
+ let offset: i32 = offset. try_into ( ) . ok ( ) ?;
220
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
221
+ }
222
+ DataType :: Int64 => {
223
+ let _: i64 = max. try_into ( ) . ok ( ) ?;
224
+ let offset: i64 = offset. try_into ( ) . ok ( ) ?;
225
+ Some ( primitive:: build_extend_with_offset ( array, offset) )
226
+ }
227
+ _ => unreachable ! ( ) ,
228
+ } ,
229
+ _ => None ,
230
+ }
231
+ }
232
+
169
233
fn build_extend ( array : & ArrayData ) -> Extend {
170
234
use crate :: datatypes:: * ;
171
235
match array. data_type ( ) {
@@ -199,17 +263,7 @@ fn build_extend(array: &ArrayData) -> Extend {
199
263
}
200
264
DataType :: List ( _) => list:: build_extend :: < i32 > ( array) ,
201
265
DataType :: LargeList ( _) => list:: build_extend :: < i64 > ( array) ,
202
- DataType :: Dictionary ( child_data_type, _) => match child_data_type. as_ref ( ) {
203
- DataType :: UInt8 => primitive:: build_extend :: < u8 > ( array) ,
204
- DataType :: UInt16 => primitive:: build_extend :: < u16 > ( array) ,
205
- DataType :: UInt32 => primitive:: build_extend :: < u32 > ( array) ,
206
- DataType :: UInt64 => primitive:: build_extend :: < u64 > ( array) ,
207
- DataType :: Int8 => primitive:: build_extend :: < i8 > ( array) ,
208
- DataType :: Int16 => primitive:: build_extend :: < i16 > ( array) ,
209
- DataType :: Int32 => primitive:: build_extend :: < i32 > ( array) ,
210
- DataType :: Int64 => primitive:: build_extend :: < i64 > ( array) ,
211
- _ => unreachable ! ( ) ,
212
- } ,
266
+ DataType :: Dictionary ( _, _) => unreachable ! ( "should use build_extend_dictionary" ) ,
213
267
DataType :: Struct ( _) => structure:: build_extend ( array) ,
214
268
DataType :: FixedSizeBinary ( _) => fixed_binary:: build_extend ( array) ,
215
269
DataType :: Float16 => unreachable ! ( ) ,
@@ -339,7 +393,29 @@ impl<'a> MutableArrayData<'a> {
339
393
} ;
340
394
341
395
let dictionary = match & data_type {
342
- DataType :: Dictionary ( _, _) => Some ( arrays[ 0 ] . child_data ( ) [ 0 ] . clone ( ) ) ,
396
+ DataType :: Dictionary ( _, _) => match arrays. len ( ) {
397
+ 0 => unreachable ! ( ) ,
398
+ 1 => Some ( arrays[ 0 ] . child_data ( ) [ 0 ] . clone ( ) ) ,
399
+ _ => {
400
+ // Concat dictionaries together
401
+ let dictionaries: Vec < _ > =
402
+ arrays. iter ( ) . map ( |array| & array. child_data ( ) [ 0 ] ) . collect ( ) ;
403
+ let lengths: Vec < _ > = dictionaries
404
+ . iter ( )
405
+ . map ( |dictionary| dictionary. len ( ) )
406
+ . collect ( ) ;
407
+ let capacity = lengths. iter ( ) . sum ( ) ;
408
+
409
+ let mut mutable =
410
+ MutableArrayData :: new ( dictionaries, false , capacity) ;
411
+
412
+ for ( i, len) in lengths. iter ( ) . enumerate ( ) {
413
+ mutable. extend ( i, 0 , * len)
414
+ }
415
+
416
+ Some ( mutable. freeze ( ) )
417
+ }
418
+ } ,
343
419
_ => None ,
344
420
} ;
345
421
@@ -353,7 +429,23 @@ impl<'a> MutableArrayData<'a> {
353
429
let null_bytes = bit_util:: ceil ( capacity, 8 ) ;
354
430
let null_buffer = MutableBuffer :: from_len_zeroed ( null_bytes) ;
355
431
356
- let extend_values = arrays. iter ( ) . map ( |array| build_extend ( array) ) . collect ( ) ;
432
+ let extend_values = match & data_type {
433
+ DataType :: Dictionary ( _, _) => {
434
+ let mut next_offset = 0 ;
435
+ let extend_values: Result < Vec < _ > > = arrays
436
+ . iter ( )
437
+ . map ( |array| {
438
+ let offset = next_offset;
439
+ next_offset += array. child_data ( ) [ 0 ] . len ( ) ;
440
+ build_extend_dictionary ( array, offset, next_offset)
441
+ . ok_or ( ArrowError :: DictionaryKeyOverflowError )
442
+ } )
443
+ . collect ( ) ;
444
+
445
+ extend_values. expect ( "MutableArrayData::new is infallible" )
446
+ }
447
+ _ => arrays. iter ( ) . map ( |array| build_extend ( array) ) . collect ( ) ,
448
+ } ;
357
449
358
450
let data = _MutableArrayData {
359
451
data_type : data_type. clone ( ) ,
0 commit comments