@@ -130,7 +130,7 @@ def preference_collate_fn(batch, max_seq_len=None, pad_token_id=0):
130
130
difference = max_seq_len - len (sequence ["input_ids" ])
131
131
132
132
input_dict ["input_ids" ].append (sequence ["input_ids" ] + [pad_token_id ] * difference )
133
- input_dict ["position_ids" ].append (sequence ["position_ids" ] + [pad_token_id ] * difference )
133
+ input_dict ["position_ids" ].append (sequence ["position_ids" ] + [0 ] * difference )
134
134
if use_attn_mask_startend_row_indices :
135
135
input_dict ["attn_mask_startend_row_indices" ].append (
136
136
[
@@ -281,7 +281,7 @@ def zero_padding_process_collate_fn(batch, max_seq_len=None, pad_token_id=0):
281
281
difference = max_seq_len - len (sequence ["input_ids" ])
282
282
283
283
input_dict ["input_ids" ].append (sequence ["input_ids" ] + [pad_token_id ] * difference )
284
- input_dict ["position_ids" ].append (sequence ["position_ids" ] + [pad_token_id ] * difference )
284
+ input_dict ["position_ids" ].append (sequence ["position_ids" ] + [0 ] * difference )
285
285
input_dict ["labels" ].append (sequence ["labels" ] + [- 100 ] * difference )
286
286
if use_attn_mask_startend_row_indices :
287
287
input_dict ["attn_mask_startend_row_indices" ].append (
@@ -334,7 +334,7 @@ def process_collate_fn(batch, pad_token_id=0):
334
334
335
335
# input_ids: Tensor(seqL, ); position_ids: list, len(seqL); labels: Tensor(seqL, )
336
336
input_dict ["input_ids" ].append (sequence ["input_ids" ].tolist () + [pad_token_id ] * difference )
337
- input_dict ["position_ids" ].append (sequence ["position_ids" ] + [pad_token_id ] * difference )
337
+ input_dict ["position_ids" ].append (sequence ["position_ids" ] + [0 ] * difference )
338
338
input_dict ["labels" ].append (sequence ["labels" ].tolist () + [- 100 ] * difference )
339
339
if use_attn_mask_startend_row_indices :
340
340
input_dict ["attn_mask_startend_row_indices" ].append (
0 commit comments