huggingface · SunMarc · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -2349,7 +2349,8 @@ def _inner_training_loop(
         if self.is_fsdp_enabled:
             self.model = self.model_wrapped = model
             # Fix `got mixed torch.Tensor and DTensor` error in model.generate() for FSDP2 with LoRA
-            dist.fsdp.register_fsdp_forward_method(self.model, "generate")
+            if hasattr(self.model, "generate"):
+                dist.fsdp.register_fsdp_forward_method(self.model, "generate")
 
         # for the rest of this function `model` is the outside model, whether it was wrapped or not
         if model is not self.model:

diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py
@@ -280,6 +280,8 @@ def test_init_zero3_missing_params(self):
         from transformers.models.gpt2.modeling_gpt2 import GPT2PreTrainedModel
 
         class TinyGPT2WithUninitializedWeights(GPT2PreTrainedModel):
+            _tied_weights_keys = {"lm_head.weight": "transformer.wte.weight"}
+
             def __init__(self, config):
                 super().__init__(config)
                 self.transformer = AutoModel.from_pretrained(GPT2_TINY, config=config)
@@ -1064,10 +1066,10 @@ def _add_eos_to_examples(example):
                 return example
 
             def _convert_to_features(example_batch):
-                input_encodings = tokenizer.batch_encode_plus(
+                input_encodings = tokenizer(
                     example_batch["input_text"], padding="max_length", max_length=512, truncation=True
                 )
-                target_encodings = tokenizer.batch_encode_plus(
+                target_encodings = tokenizer(
                     example_batch["target_text"], padding="max_length", max_length=16, truncation=True
                 )
 

diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
@@ -1541,9 +1541,7 @@ def test_multiple_peft_adapters(self):
         tiny_model = get_peft_model(tiny_model, peft_config, "adapter1")
         tiny_model.add_adapter("adapter2", peft_config)
 
-        max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)
-
-        train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
+        train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
 
         tokenizer.pad_token = tokenizer.eos_token
 
@@ -3733,9 +3731,7 @@ def test_trainer_eval_multiple(self):
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
         model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
 
-        max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)
-
-        dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
+        dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
         with tempfile.TemporaryDirectory() as tmp_dir:
             training_args = TrainingArguments(
                 output_dir=tmp_dir,
@@ -3759,8 +3755,7 @@ def test_trainer_eval_multiple(self):
     def test_trainer_eval_lm(self):
         MODEL_ID = "distilbert/distilroberta-base"
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)
-        dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
+        dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
         self.assertEqual(len(dataset), 31)
 
     def test_training_iterable_dataset(self):
@@ -4942,10 +4937,8 @@ def test_trainer_works_without_model_config(self):
 
         tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
         model = BasicTextGenerationModel(vocab_size=tokenizer.vocab_size, hidden_size=32)
-        # Note that this class does not have a config attribute
-        max_len_single_sentence = tokenizer.model_max_length - tokenizer.num_special_tokens_to_add(pair=False)
 
-        train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
+        train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
 
         with tempfile.TemporaryDirectory() as tmpdir:
             training_args = TrainingArguments(