Skip to content

Commit

Permalink
Fine-Tuning Scheduler Tutorial Update for Lightning/PyTorch 2.4.0 (#…
Browse files Browse the repository at this point in the history
…351)

Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com>
  • Loading branch information
speediedan and Borda authored Jul 26, 2024
1 parent a01f01f commit 36f10f1
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
10 changes: 7 additions & 3 deletions lightning_examples/finetuning-scheduler/finetuning-scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ def __init__(
"num_workers": dataloader_kwargs.get("num_workers", 0),
"pin_memory": dataloader_kwargs.get("pin_memory", False),
}
# starting with HF Datasets v3.x, trust_remote_code must be `True` https://bit.ly/hf_datasets_trust_remote_req
self.trust_remote_code = True
self.save_hyperparameters()
os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"
self.tokenizer = AutoTokenizer.from_pretrained(
Expand All @@ -265,11 +267,13 @@ def prepare_data(self):
"""Load the SuperGLUE dataset."""
# N.B. PL calls prepare_data from a single process (rank 0) so do not use it to assign
# state (e.g. self.x=y)
datasets.load_dataset("super_glue", self.hparams.task_name)
datasets.load_dataset("super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code)

def setup(self, stage):
"""Setup our dataset splits for training/validation."""
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name)
self.dataset = datasets.load_dataset(
"super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code
)
for split in self.dataset.keys():
self.dataset[split] = self.dataset[split].map(
self._convert_to_features, batched=True, remove_columns=["label"]
Expand Down Expand Up @@ -385,7 +389,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
self.log_dict(metric_dict, prog_bar=True)

def configure_optimizers(self):
# With FTS >= 2.0, ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
# ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
# configured here will optimize the parameters (and only those parameters) scheduled to be optimized in phase 0
# of the current fine-tuning schedule. This auto-configuration can be disabled if desired by setting
# ``enforce_phase0_params`` to ``False``.
Expand Down
8 changes: 2 additions & 6 deletions lightning_examples/finetuning-scheduler/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,2 @@
lightning # todo: the tuner depends on L so later using with PL crash
finetuning-scheduler[examples] ==2.3.*
datasets ==2.17.*
# todo: pin version intill reinstall with PT-eco alignment
torch ==2.1.*
torchvision ==0.16.*
datasets >=2.17.0 # to allow explicitly setting `trust_remote_code`
finetuning-scheduler[examples] <=2.4.0

0 comments on commit 36f10f1

Please sign in to comment.