Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2349,7 +2349,8 @@ def _inner_training_loop(
if self.is_fsdp_enabled:
self.model = self.model_wrapped = model
# Fix `got mixed torch.Tensor and DTensor` error in model.generate() for FSDP2 with LoRA
dist.fsdp.register_fsdp_forward_method(self.model, "generate")
if hasattr(self.model, "generate"):
dist.fsdp.register_fsdp_forward_method(self.model, "generate")

# for the rest of this function `model` is the outside model, whether it was wrapped or not
if model is not self.model:
Expand Down
6 changes: 4 additions & 2 deletions tests/deepspeed/test_deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ def test_init_zero3_missing_params(self):
from transformers.models.gpt2.modeling_gpt2 import GPT2PreTrainedModel

class TinyGPT2WithUninitializedWeights(GPT2PreTrainedModel):
_tied_weights_keys = {"lm_head.weight": "transformer.wte.weight"}

def __init__(self, config):
super().__init__(config)
self.transformer = AutoModel.from_pretrained(GPT2_TINY, config=config)
Expand Down Expand Up @@ -1064,10 +1066,10 @@ def _add_eos_to_examples(example):
return example

def _convert_to_features(example_batch):
input_encodings = tokenizer.batch_encode_plus(
input_encodings = tokenizer(
example_batch["input_text"], padding="max_length", max_length=512, truncation=True
)
target_encodings = tokenizer.batch_encode_plus(
target_encodings = tokenizer(
example_batch["target_text"], padding="max_length", max_length=16, truncation=True
)

Expand Down
15 changes: 4 additions & 11 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,9 +1541,7 @@ def test_multiple_peft_adapters(self):
tiny_model = get_peft_model(tiny_model, peft_config, "adapter1")
tiny_model.add_adapter("adapter2", peft_config)

max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)

train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)

tokenizer.pad_token = tokenizer.eos_token

Expand Down Expand Up @@ -3733,9 +3731,7 @@ def test_trainer_eval_multiple(self):
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID)

max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)

dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
with tempfile.TemporaryDirectory() as tmp_dir:
training_args = TrainingArguments(
output_dir=tmp_dir,
Expand All @@ -3759,8 +3755,7 @@ def test_trainer_eval_multiple(self):
def test_trainer_eval_lm(self):
MODEL_ID = "distilbert/distilroberta-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
max_len_single_sentence = self.model_max_length - self.num_special_tokens_to_add(pair=False)
dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)
self.assertEqual(len(dataset), 31)

def test_training_iterable_dataset(self):
Expand Down Expand Up @@ -4942,10 +4937,8 @@ def test_trainer_works_without_model_config(self):

tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM")
model = BasicTextGenerationModel(vocab_size=tokenizer.vocab_size, hidden_size=32)
# Note that this class does not have a config attribute
max_len_single_sentence = tokenizer.model_max_length - tokenizer.num_special_tokens_to_add(pair=False)

train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, max_len_single_sentence)
train_dataset = get_dataset(PATH_SAMPLE_TEXT, tokenizer, 100)

with tempfile.TemporaryDirectory() as tmpdir:
training_args = TrainingArguments(
Expand Down