@@ -54,7 +54,6 @@ describe("Tokenizer padding/truncation", () => {
5454  } ,  MAX_TOKENIZER_LOAD_TIME ) ; 
5555
5656  describe ( "return_tensor=false (jagged array)" ,  ( )  =>  { 
57- 
5857    test ( "jagged array output when return_tensor is false" ,  ( )  =>  { 
5958      const  output  =  tokenizer ( inputs ,  { 
6059        return_tensor : false , 
@@ -105,7 +104,6 @@ describe("Tokenizer padding/truncation", () => {
105104      compare ( output ,  expected ) ; 
106105    } ) ; 
107106
108- 
109107    test ( "No padding, max_length=3 (implicit truncation strategy)" ,  ( )  =>  { 
110108      const  output  =  tokenizer ( inputs_2 ,  { 
111109        padding : false , 
@@ -129,9 +127,18 @@ describe("Tokenizer padding/truncation", () => {
129127        return_tensor : false , 
130128      } ) ; 
131129      const  expected  =  { 
132-         input_ids : [ [ 1037 ,  0 ,  0 ,  0 ,  0 ] ,  [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ] ] , 
133-         token_type_ids : [ [ 0 ,  0 ,  0 ,  0 ,  0 ] ,  [ 0 ,  0 ,  0 ,  0 ,  0 ] ] , 
134-         attention_mask : [ [ 1 ,  0 ,  0 ,  0 ,  0 ] ,  [ 1 ,  1 ,  1 ,  1 ,  1 ] ] , 
130+         input_ids : [ 
131+           [ 1037 ,  0 ,  0 ,  0 ,  0 ] , 
132+           [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ] , 
133+         ] , 
134+         token_type_ids : [ 
135+           [ 0 ,  0 ,  0 ,  0 ,  0 ] , 
136+           [ 0 ,  0 ,  0 ,  0 ,  0 ] , 
137+         ] , 
138+         attention_mask : [ 
139+           [ 1 ,  0 ,  0 ,  0 ,  0 ] , 
140+           [ 1 ,  1 ,  1 ,  1 ,  1 ] , 
141+         ] , 
135142      } ; 
136143      compare ( output ,  expected ) ; 
137144    } ) ; 
@@ -161,48 +168,75 @@ describe("Tokenizer padding/truncation", () => {
161168        return_tensor : false , 
162169      } ) ; 
163170      const  expected  =  { 
164-         input_ids : [ [ 1037 ,  0 ,  0 ] ,  [ 1038 ,  1039 ,  1040 ] ] , 
165-         token_type_ids : [ [ 0 ,  0 ,  0 ] ,  [ 0 ,  0 ,  0 ] ] , 
166-         attention_mask : [ [ 1 ,  0 ,  0 ] ,  [ 1 ,  1 ,  1 ] ] , 
171+         input_ids : [ 
172+           [ 1037 ,  0 ,  0 ] , 
173+           [ 1038 ,  1039 ,  1040 ] , 
174+         ] , 
175+         token_type_ids : [ 
176+           [ 0 ,  0 ,  0 ] , 
177+           [ 0 ,  0 ,  0 ] , 
178+         ] , 
179+         attention_mask : [ 
180+           [ 1 ,  0 ,  0 ] , 
181+           [ 1 ,  1 ,  1 ] , 
182+         ] , 
167183      } ; 
168184      compare ( output ,  expected ) ; 
169185    } ) ; 
170186
171187    test ( "Padding 'max_length' without truncation, max_length=3" ,  ( )  =>  { 
172188      const  output  =  tokenizer ( inputs_2 ,  { 
173-         padding : ' max_length' , 
189+         padding : " max_length" , 
174190        truncation : false , 
175191        max_length : 3 , 
176192        add_special_tokens : false , 
177193        return_tensor : false , 
178194      } ) ; 
179195      const  expected  =  { 
180-         input_ids : [ [ 1037 ,  0 ,  0 ] ,  [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ] ] , 
181-         token_type_ids : [ [ 0 ,  0 ,  0 ] ,  [ 0 ,  0 ,  0 ,  0 ,  0 ] ] , 
182-         attention_mask : [ [ 1 ,  0 ,  0 ] ,  [ 1 ,  1 ,  1 ,  1 ,  1 ] ] , 
196+         input_ids : [ 
197+           [ 1037 ,  0 ,  0 ] , 
198+           [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ] , 
199+         ] , 
200+         token_type_ids : [ 
201+           [ 0 ,  0 ,  0 ] , 
202+           [ 0 ,  0 ,  0 ,  0 ,  0 ] , 
203+         ] , 
204+         attention_mask : [ 
205+           [ 1 ,  0 ,  0 ] , 
206+           [ 1 ,  1 ,  1 ,  1 ,  1 ] , 
207+         ] , 
183208      } ; 
184209      compare ( output ,  expected ) ; 
185210    } ) ; 
186211
187212    test ( "Padding 'max_length' with truncation, max_length=3" ,  ( )  =>  { 
188213      const  output  =  tokenizer ( inputs_2 ,  { 
189-         padding : ' max_length' , 
214+         padding : " max_length" , 
190215        truncation : true , 
191216        max_length : 3 , 
192217        add_special_tokens : false , 
193218        return_tensor : false , 
194219      } ) ; 
195220      const  expected  =  { 
196-         input_ids : [ [ 1037 ,  0 ,  0 ] ,  [ 1038 ,  1039 ,  1040 ] ] , 
197-         token_type_ids : [ [ 0 ,  0 ,  0 ] ,  [ 0 ,  0 ,  0 ] ] , 
198-         attention_mask : [ [ 1 ,  0 ,  0 ] ,  [ 1 ,  1 ,  1 ] ] , 
221+         input_ids : [ 
222+           [ 1037 ,  0 ,  0 ] , 
223+           [ 1038 ,  1039 ,  1040 ] , 
224+         ] , 
225+         token_type_ids : [ 
226+           [ 0 ,  0 ,  0 ] , 
227+           [ 0 ,  0 ,  0 ] , 
228+         ] , 
229+         attention_mask : [ 
230+           [ 1 ,  0 ,  0 ] , 
231+           [ 1 ,  1 ,  1 ] , 
232+         ] , 
199233      } ; 
200234      compare ( output ,  expected ) ; 
201235    } ) ; 
202236
203237    test ( "Padding 'max_length' without truncation and max_length=null" ,  ( )  =>  { 
204238      const  output  =  tokenizer ( inputs_2 ,  { 
205-         padding : ' max_length' , 
239+         padding : " max_length" , 
206240        truncation : false , 
207241        max_length : null , 
208242        add_special_tokens : false , 
@@ -211,23 +245,22 @@ describe("Tokenizer padding/truncation", () => {
211245      const  expected  =  { 
212246        input_ids : [ 
213247          [ 1037 ,  ...Array ( 511 ) . fill ( 0 ) ] , 
214-           [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ,  ...Array ( 507 ) . fill ( 0 ) ] 
248+           [ 1038 ,  1039 ,  1040 ,  1041 ,  1042 ,  ...Array ( 507 ) . fill ( 0 ) ] , 
215249        ] , 
216250        token_type_ids : [ 
217251          [ 0 ,  ...Array ( 511 ) . fill ( 0 ) ] , 
218-           [ 0 ,  0 ,  0 ,  0 ,  0 ,  ...Array ( 507 ) . fill ( 0 ) ] 
252+           [ 0 ,  0 ,  0 ,  0 ,  0 ,  ...Array ( 507 ) . fill ( 0 ) ] , 
219253        ] , 
220254        attention_mask : [ 
221255          [ 1 ,  ...Array ( 511 ) . fill ( 0 ) ] , 
222-           [ 1 ,  1 ,  1 ,  1 ,  1 ,  ...Array ( 507 ) . fill ( 0 ) ] 
256+           [ 1 ,  1 ,  1 ,  1 ,  1 ,  ...Array ( 507 ) . fill ( 0 ) ] , 
223257        ] , 
224258      } ; 
225259      compare ( output ,  expected ) ; 
226260    } ) ; 
227261  } ) ; 
228262
229263  describe ( "return_tensor=true" ,  ( )  =>  { 
230- 
231264    test ( "throws error when tensor output is requested for a jagged array" ,  ( )  =>  { 
232265      expect ( ( )  =>  tokenizer ( inputs ) ) . toThrow ( "Unable to create tensor" ) ; 
233266    } ) ; 
@@ -329,7 +362,7 @@ describe("Tokenizer padding/truncation", () => {
329362
330363    test ( "padding:'max_length' pads to the specified max_length" ,  ( )  =>  { 
331364      const  {  input_ids,  attention_mask,  token_type_ids }  =  tokenizer ( inputs ,  { 
332-         padding : ' max_length' , 
365+         padding : " max_length" , 
333366        truncation : true , 
334367        add_special_tokens : false , 
335368        max_length : 3 , 
@@ -347,7 +380,7 @@ describe("Tokenizer padding/truncation", () => {
347380        [ 0n ,  0n ,  0n ] , 
348381      ] ) ; 
349382    } ) ; 
350-   } ) 
383+   } ) ; 
351384} ) ; 
352385
353386describe ( "Token type ids" ,  ( )  =>  { 
0 commit comments