Skip to content

Commit

Permalink
Adding support for the SophiaG optimizer in the mlm trainer, adding n…
Browse files Browse the repository at this point in the history
…ew config
  • Loading branch information
dchaplinsky committed Jul 6, 2023
1 parent 907ad8b commit 1e17678
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"output_dir": "exps/roberta.oscar.nofilter.wechsel.largedict.sophia",
"model_type": "roberta",
"config_name": "exps/roberta.oscar.nofilter.wechsel.largedict.sophia",
"tokenizer_name": "exps/roberta.oscar.nofilter.wechsel.largedict.sophia",
"train_file": "data/oscar.nofilter",
"max_seq_length": 512,
"do_train": true,
"do_eval": true,
"validation_file": "data/bruk_valid_data.txt",
"weight_decay": 1e-1,
"per_device_train_batch_size": 24,
"per_device_eval_batch_size": 24,
"gradient_accumulation_steps": 4,
"learning_rate": 2e-4,
"evaluation_strategy": "steps",
"warmup_steps": 25000,
"max_steps": 250000,
"eval_steps": 500,
"save_steps": 12500,
"logging_steps": 500,
"overwrite_output_dir": true,
"num_train_epochs": 0,
"adam_beta1": 0.965,
"adam_beta2": 0.99,
"adam_epsilon": 1e-6,
"preprocessing_num_workers": 48,
"fp16": true,
"model_name_or_path": "exps/roberta.oscar.nofilter.wechsel.largedict.sophia"
}
50 changes: 39 additions & 11 deletions run_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ class DataTrainingArguments:
metadata={"help": "The name of the project to which the training run will belong on Weights & Biases."}
)

optimizer: Optional[str] = field(
default="adam",
metadata={"help": "The optimizer to use for training."}
)


class DataCollatorForLanguageModeling(transformers.DataCollatorForLanguageModeling):
def torch_mask_tokens(self, inputs, special_tokens_mask = None):
Expand Down Expand Up @@ -503,17 +508,40 @@ def compute_metrics(eval_preds):
wandb.config.update(data_args)
wandb.save(__file__, policy="now")

# Initialize our Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.do_eval else None,
preprocess_logits_for_metrics=preprocess_logits_for_metrics if training_args.do_eval else None,
)
if data_args.optimizer == "adam":
# Initialize our Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.do_eval else None,
preprocess_logits_for_metrics=preprocess_logits_for_metrics if training_args.do_eval else None,
)
elif data_args.optimizer == "sophia":
from optimizers.sophia import SophiaG
optimizer = SophiaG(
filter(lambda p: p.requires_grad, model.parameters()),
lr=training_args.learning_rate,
weight_decay=training_args.weight_decay,
betas=(training_args.adam_beta1, training_args.adam_beta2),
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=eval_dataset if training_args.do_eval else None,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.do_eval else None,
preprocess_logits_for_metrics=preprocess_logits_for_metrics if training_args.do_eval else None,
optimizers=(optimizer, None),
)
else:
raise ValueError("Optimizer not supported")

# Training
if training_args.do_train:
Expand Down

0 comments on commit 1e17678

Please sign in to comment.