From a03e7cc4e443e30eea942ca66bfce19407784f32 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Wed, 5 Jun 2024 11:00:19 -0400 Subject: [PATCH] Release 0.9.2 (#1697) * Release: 0.9.0 * Release --- examples/scripts/ppo/ppo_zephyr.py | 127 -------------------------- examples/scripts/rloo/rloo_zephyr.py | 132 --------------------------- setup.py | 2 +- trl/__init__.py | 2 +- 4 files changed, 2 insertions(+), 261 deletions(-) delete mode 100644 examples/scripts/ppo/ppo_zephyr.py delete mode 100644 examples/scripts/rloo/rloo_zephyr.py diff --git a/examples/scripts/ppo/ppo_zephyr.py b/examples/scripts/ppo/ppo_zephyr.py deleted file mode 100644 index 8c5d98e35e..0000000000 --- a/examples/scripts/ppo/ppo_zephyr.py +++ /dev/null @@ -1,127 +0,0 @@ -import multiprocessing -import shutil - -from datasets import load_dataset -from transformers import ( - AutoModelForCausalLM, - AutoModelForSequenceClassification, - AutoTokenizer, - HfArgumentParser, - PreTrainedModel, -) - -from trl import ModelConfig -from trl.trainer.ppov2_trainer import PPOv2Config, PPOv2Trainer - - -""" -python -i examples/scripts/ppo/ppo_zephyr.py \ - --learning_rate 3e-6 \ - --output_dir models/minimal/ppo \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 32 \ - --total_episodes 10000 \ - --model_name_or_path EleutherAI/pythia-1b-deduped \ - --sft_model_path EleutherAI/pythia-1b-deduped \ - --reward_model_path EleutherAI/pythia-1b-deduped \ - --non_eos_penalty \ - --stop_token eos \ - -accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \ - examples/scripts/ppo/ppo_zephyr.py \ - --output_dir models/minimal/ppo_zephyr10 \ - --num_ppo_epochs 1 \ - --num_mini_batches 1 \ - --learning_rate 3e-6 \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 32 \ - --total_episodes 200000 \ - --model_name_or_path HuggingFaceH4/mistral-7b-sft-beta \ - --sft_model_path HuggingFaceH4/mistral-7b-sft-beta \ - --reward_model_path weqweasdas/RM-Mistral-7B \ - --local_rollout_forward_batch_size 32 \ - --deepspeed3 \ - --kl_coef 0.10 \ - --non_eos_penalty \ - --stop_token eos \ - --response_length 512 \ -""" - -if __name__ == "__main__": - parser = HfArgumentParser((PPOv2Config, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() - # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) - - ################ - # Model & Tokenizer - ################ - tokenizer = AutoTokenizer.from_pretrained( - model_config.model_name_or_path, - padding_side="left", - trust_remote_code=True, - ) - tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if tokenizer.chat_template is None: - # a default chat template to simply concatenate the messages - tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}" - value_model: PreTrainedModel = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, - num_labels=1, - ) - reward_model: PreTrainedModel = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, - num_labels=1, - ) - ref_policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - ################ - # Dataset - ################ - raw_datasets = load_dataset("HuggingFaceH4/ultrachat_200k") - if config.sanity_check: - for key in raw_datasets: - raw_datasets[key] = raw_datasets[key].select(range(1000)) - train_dataset = raw_datasets["train_sft"] - eval_dataset = raw_datasets["test_sft"] - - def prepare_dataset(dataset, tokenizer): - """pre-tokenize the dataset before training; only collate during training""" - - def tokenize(element): - input_ids = tokenizer.apply_chat_template( - element["messages"][:1], - padding=False, - add_generation_prompt=True, - ) - return {"input_ids": input_ids, "lengths": len(input_ids)} - - return dataset.map( - tokenize, - remove_columns=dataset.column_names, - num_proc=1 if config.sanity_check else multiprocessing.cpu_count(), - load_from_cache_file=not config.sanity_check, - ) - - train_dataset = prepare_dataset(train_dataset, tokenizer) - eval_dataset = prepare_dataset(eval_dataset, tokenizer) - # filtering - train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 1024) - eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 1024) - ################ - # Training - ################ - trainer = PPOv2Trainer( - config=config, - tokenizer=tokenizer, - policy=policy, - ref_policy=ref_policy, - reward_model=reward_model, - value_model=value_model, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - ) - trainer.train() - trainer.save_model(config.output_dir) - trainer.push_to_hub() - trainer.generate_completions() diff --git a/examples/scripts/rloo/rloo_zephyr.py b/examples/scripts/rloo/rloo_zephyr.py deleted file mode 100644 index 0121abd395..0000000000 --- a/examples/scripts/rloo/rloo_zephyr.py +++ /dev/null @@ -1,132 +0,0 @@ -import multiprocessing -import shutil - -from datasets import load_dataset -from transformers import ( - AutoModelForCausalLM, - AutoModelForSequenceClassification, - AutoTokenizer, - HfArgumentParser, - PreTrainedModel, -) - -from trl import ModelConfig -from trl.trainer.rloo_trainer import RLOOConfig, RLOOTrainer - - -""" -python -i examples/scripts/rloo/rloo_zephyr.py \ - --learning_rate 3e-6 \ - --output_dir models/minimal/rloo_zephyr \ - --per_device_train_batch_size 64 \ - --gradient_accumulation_steps 1 \ - --total_episodes 10000 \ - --model_name_or_path HuggingFaceH4/mistral-7b-sft-beta \ - --sft_model_path HuggingFaceH4/mistral-7b-sft-beta \ - --reward_model_path weqweasdas/RM-Mistral-7B \ - --non_eos_penalty \ - --stop_token eos \ - --response_length 53 \ - --sanity_check -accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \ - examples/scripts/rloo/rloo_zephyr.py \ - --num_ppo_epochs 1 \ - --num_mini_batches 1 \ - --rloo_k 2 \ - --learning_rate 3e-6 \ - --output_dir models/minimal/rloo_zephyr11 \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 32 \ - --total_episodes 200000 \ - --model_name_or_path HuggingFaceH4/mistral-7b-sft-beta \ - --sft_model_path HuggingFaceH4/mistral-7b-sft-beta \ - --reward_model_path weqweasdas/RM-Mistral-7B \ - --local_rollout_forward_batch_size 32 \ - --deepspeed3 \ - --kl_coef 0.10 \ - --non_eos_penalty \ - --stop_token eos \ - --response_length 512 \ -""" - -if __name__ == "__main__": - parser = HfArgumentParser((RLOOConfig, ModelConfig)) - config, model_config = parser.parse_args_into_dataclasses() - # remove output_dir if exists - shutil.rmtree(config.output_dir, ignore_errors=True) - - ################ - # Model & Tokenizer - ################ - tokenizer = AutoTokenizer.from_pretrained( - model_config.model_name_or_path, - padding_side="left", - trust_remote_code=True, - ) - tokenizer.add_special_tokens({"pad_token": "[PAD]"}) - if tokenizer.chat_template is None: - # a default chat template to simply concatenate the messages - tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}" - value_model: PreTrainedModel = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, - num_labels=1, - ) - reward_model: PreTrainedModel = AutoModelForSequenceClassification.from_pretrained( - config.reward_model_path, - num_labels=1, - ) - ref_policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - policy = AutoModelForCausalLM.from_pretrained(config.sft_model_path) - ################ - # Dataset - ################ - raw_datasets = load_dataset("HuggingFaceH4/ultrachat_200k") - if config.sanity_check: - for key in raw_datasets: - raw_datasets[key] = raw_datasets[key].select(range(1000)) - train_dataset = raw_datasets["train_sft"] - eval_dataset = raw_datasets["test_sft"] - # train_dataset = train_dataset.select(range(1000)) - # eval_dataset = eval_dataset.select(range(1000)) - - dataset_text_field = "prompt" - - def prepare_dataset(dataset, tokenizer): - """pre-tokenize the dataset before training; only collate during training""" - - def tokenize(element): - input_ids = tokenizer.apply_chat_template( - element["messages"][:1], - padding=False, - add_generation_prompt=True, - ) - return {"input_ids": input_ids, "lengths": len(input_ids)} - - return dataset.map( - tokenize, - remove_columns=dataset.column_names, - num_proc=1 if config.sanity_check else multiprocessing.cpu_count(), - load_from_cache_file=not config.sanity_check, - ) - - train_dataset = prepare_dataset(train_dataset, tokenizer) - eval_dataset = prepare_dataset(eval_dataset, tokenizer) - # filtering - train_dataset = train_dataset.filter(lambda x: x["lengths"] <= 1024) - eval_dataset = eval_dataset.filter(lambda x: x["lengths"] <= 1024) - ################ - # Training - ################ - trainer = RLOOTrainer( - config=config, - tokenizer=tokenizer, - policy=policy, - ref_policy=ref_policy, - reward_model=reward_model, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - ) - trainer.train() - trainer.save_model(config.output_dir) - trainer.push_to_hub() - trainer.generate_completions() diff --git a/setup.py b/setup.py index 0eb1c13efa..517b2cfcc8 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ from setuptools import find_packages, setup -__version__ = "0.8.7.dev0" # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) +__version__ = "0.9.2" # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) REQUIRED_PKGS = [ "torch>=1.4.0", diff --git a/trl/__init__.py b/trl/__init__.py index 6b33eca27f..f6875475d7 100644 --- a/trl/__init__.py +++ b/trl/__init__.py @@ -1,6 +1,6 @@ # flake8: noqa -__version__ = "0.8.7.dev0" +__version__ = "0.9.2" from typing import TYPE_CHECKING from .import_utils import _LazyModule, is_diffusers_available, OptionalDependencyNotAvailable