1717
1818import { TextEncoder } from 'text-encoding-utf-8' ;
1919import Arrow from '../Arrow' ;
20- import { type , TypedArray , TypedArrayConstructor } from '../../src/Arrow' ;
20+ import { type , TypedArray , TypedArrayConstructor , Vector } from '../../src/Arrow' ;
21+ import { packBools } from '../../src/util/bit'
2122
2223const utf8Encoder = new TextEncoder ( 'utf-8' ) ;
2324
24- const { BoolData, FlatData, FlatListData } = Arrow . data ;
25- const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow . vector ;
25+ const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow . data ;
26+ const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow . vector ;
2627const {
27- Utf8, Bool,
28+ Dictionary , Utf8, Bool,
2829 Float16, Float32, Float64,
2930 Int8, Int16, Int32, Int64,
3031 Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
310311 let offset = 0 ;
311312 const offsets = Uint32Array . of ( 0 , ...values . map ( ( d ) => { offset += d . length ; return offset ; } ) ) ;
312313 const vector = new Utf8Vector ( new FlatListData ( new Utf8 ( ) , n , null , offsets , utf8Encoder . encode ( values . join ( '' ) ) ) ) ;
314+ basicVectorTests ( vector , values , [ 'abc' , '123' ] ) ;
315+ describe ( `sliced` , ( ) => {
316+ basicVectorTests ( vector . slice ( 1 , 3 ) , values . slice ( 1 , 3 ) , [ 'foo' , 'abc' ] ) ;
317+ } ) ;
318+ } ) ;
319+
320+ describe ( `DictionaryVector` , ( ) => {
321+ const dictionary = [ 'foo' , 'bar' , 'baz' ] ;
322+ const extras = [ 'abc' , '123' ] ; // values to search for that should NOT be found
323+ let offset = 0 ;
324+ const offsets = Uint32Array . of ( 0 , ...dictionary . map ( ( d ) => { offset += d . length ; return offset ; } ) ) ;
325+ const dictionary_vec = new Utf8Vector ( new FlatListData ( new Utf8 ( ) , dictionary . length , null , offsets , utf8Encoder . encode ( dictionary . join ( '' ) ) ) ) ;
326+
327+ const indices = Array . from ( { length : 50 } , ( ) => Math . random ( ) * 3 | 0 ) ;
328+
329+ describe ( `index with nullCount == 0` , ( ) => {
330+ const indices_data = new FlatData ( new Int32 ( ) , indices . length , new Uint8Array ( 0 ) , indices ) ;
331+
332+ const values = Array . from ( indices ) . map ( ( d ) => dictionary [ d ] ) ;
333+ const vector = new DictionaryVector ( new DictionaryData ( new Dictionary ( dictionary_vec . type , indices_data . type ) , dictionary_vec , indices_data ) ) ;
334+
335+ basicVectorTests ( vector , values , extras ) ;
336+
337+ describe ( `sliced` , ( ) => {
338+ basicVectorTests ( vector . slice ( 10 , 20 ) , values . slice ( 10 , 20 ) , extras ) ;
339+ } )
340+ } ) ;
341+
342+ describe ( `index with nullCount > 0` , ( ) => {
343+ const validity = Array . from ( { length : indices . length } , ( ) => Math . random ( ) > 0.2 ? true : false ) ;
344+ const indices_data = new FlatData ( new Int32 ( ) , indices . length , packBools ( validity ) , indices , 0 , validity . reduce ( ( acc , d ) => acc + ( d ? 0 : 1 ) , 0 ) ) ;
345+ const values = Array . from ( indices ) . map ( ( d , i ) => validity [ i ] ? dictionary [ d ] : null ) ;
346+ const vector = new DictionaryVector ( new DictionaryData ( new Dictionary ( dictionary_vec . type , indices_data . type ) , dictionary_vec , indices_data ) ) ;
347+
348+ basicVectorTests ( vector , values , [ 'abc' , '123' ] ) ;
349+ describe ( `sliced` , ( ) => {
350+ basicVectorTests ( vector . slice ( 10 , 20 ) , values . slice ( 10 , 20 ) , extras ) ;
351+ } ) ;
352+ } ) ;
353+ } ) ;
354+
355+ // Creates some basic tests for the given vector.
356+ // Verifies that:
357+ // - `get` and the native iterator return the same data as `values`
358+ // - `indexOf` returns the same indices as `values`
359+ function basicVectorTests ( vector : Vector , values : any [ ] , extras : any [ ] ) {
360+ const n = values . length ;
361+
313362 test ( `gets expected values` , ( ) => {
314363 let i = - 1 ;
315364 while ( ++ i < n ) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
325374 }
326375 } ) ;
327376 test ( `indexOf returns expected values` , ( ) => {
328- let testValues = values . concat ( [ 'abc' , '12345' ] ) ;
377+ let testValues = values . concat ( extras ) ;
329378
330379 for ( const value of testValues ) {
331380 const expected = values . indexOf ( value ) ;
332381 expect ( vector . indexOf ( value ) ) . toEqual ( expected ) ;
333382 }
334383 } ) ;
335- } ) ;
384+ }
336385
337386function toMap < T > ( entries : Record < string , T > , keys : string [ ] ) {
338387 return keys . reduce ( ( map , key ) => {
0 commit comments