Skip to content

Commit

Permalink
Merge branch 'main' into fix-deprecation-warning-message
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec authored Nov 25, 2024
2 parents f8a43ba + 453db5c commit 7058ca0
Show file tree
Hide file tree
Showing 32 changed files with 482 additions and 275 deletions.
2 changes: 1 addition & 1 deletion commands/run_dpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This script runs an SFT example end-to-end on a tiny model using different possible configurations
# but defaults to QLoRA + PEFT
OUTPUT_DIR="test_dpo/"
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
DATASET_NAME="trl-internal-testing/hh-rlhf-helpful-base-trl-style"
MAX_STEPS=5
BATCH_SIZE=2
Expand Down
2 changes: 1 addition & 1 deletion commands/run_sft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This script runs an SFT example end-to-end on a tiny model using different possible configurations
# but defaults to QLoRA + PEFT
OUTPUT_DIR="test_sft/"
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
DATASET_NAME="stanfordnlp/imdb"
MAX_STEPS=5
BATCH_SIZE=2
Expand Down
2 changes: 1 addition & 1 deletion docs/source/clis.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ We also recommend you passing a YAML config file to configure your training prot

```yaml
model_name_or_path:
trl-internal-testing/tiny-random-LlamaForCausalLM
Qwen/Qwen2.5-0.5B
dataset_name:
stanfordnlp/imdb
report_to:
Expand Down
2 changes: 1 addition & 1 deletion examples/cli_configs/example_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# CUDA_VISIBLE_DEVICES: 0

model_name_or_path:
trl-internal-testing/tiny-random-LlamaForCausalLM
Qwen/Qwen2.5-0.5B
dataset_name:
stanfordnlp/imdb
report_to:
Expand Down
193 changes: 193 additions & 0 deletions scripts/generate_tiny_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the
# `trl-internal-testing` organization.
# This script is meant to be run when adding new tiny model to the TRL library.

from huggingface_hub import HfApi, ModelCard
from transformers import (
AutoProcessor,
AutoTokenizer,
BartConfig,
BartModel,
BloomConfig,
BloomForCausalLM,
CLIPVisionConfig,
CohereConfig,
CohereForCausalLM,
DbrxConfig,
DbrxForCausalLM,
FalconMambaConfig,
FalconMambaForCausalLM,
Gemma2Config,
Gemma2ForCausalLM,
GemmaConfig,
GemmaForCausalLM,
GPT2Config,
GPT2LMHeadModel,
GPTNeoXConfig,
GPTNeoXForCausalLM,
Idefics2Config,
Idefics2ForConditionalGeneration,
LlamaConfig,
LlamaForCausalLM,
LlavaConfig,
LlavaForConditionalGeneration,
LlavaNextConfig,
LlavaNextForConditionalGeneration,
MistralConfig,
MistralForCausalLM,
OPTConfig,
OPTForCausalLM,
PaliGemmaConfig,
PaliGemmaForConditionalGeneration,
Phi3Config,
Phi3ForCausalLM,
Qwen2Config,
Qwen2ForCausalLM,
SiglipVisionConfig,
T5Config,
T5ForConditionalGeneration,
)
from transformers.models.idefics2.configuration_idefics2 import Idefics2VisionConfig


ORGANIZATION = "trl-internal-testing"

MODEL_CARD = """
---
library_name: transformers
tags: [trl]
---
# Tiny {model_class_name}
This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
"""


api = HfApi()


def push_to_hub(model, tokenizer, suffix=None):
model_class_name = model.__class__.__name__
content = MODEL_CARD.format(model_class_name=model_class_name)
model_card = ModelCard(content)
repo_id = f"{ORGANIZATION}/tiny-{model_class_name}"
if suffix is not None:
repo_id += f"-{suffix}"

if api.repo_exists(repo_id):
print(f"Model {repo_id} already exists, skipping")
else:
model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)
model_card.push_to_hub(repo_id)


# Decoder models
for model_id, config_class, model_class, suffix in [
("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None),
("CohereForAI/aya-expanse-8b", CohereConfig, CohereForCausalLM, None),
("databricks/dbrx-instruct", DbrxConfig, DbrxForCausalLM, None),
("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, None),
("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, None),
("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, None),
("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, None),
("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, None),
("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3"),
("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3.1"),
("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, "3.2"),
("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, "0.1"),
("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, "0.2"),
("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, None),
("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, None),
("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, "2.5"),
("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, "2.5-Coder"),
]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = config_class(
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
hidden_size=8,
num_attention_heads=4,
num_key_value_heads=2,
num_hidden_layers=2,
intermediate_size=32,
)
model = model_class(config)
push_to_hub(model, tokenizer, suffix)


# Encoder-decoder models
for model_id, config_class, model_class, suffix in [
("google/flan-t5-small", T5Config, T5ForConditionalGeneration, None),
("facebook/bart-base", BartConfig, BartModel, None),
]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = config_class(
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
d_model=16,
encoder_layers=2,
decoder_layers=2,
d_kv=2,
d_ff=64,
num_layers=6,
num_heads=8,
decoder_start_token_id=0,
is_encoder_decoder=True,
)
model = model_class(config)
push_to_hub(model, tokenizer, suffix)


# Vision Language Models
# fmt: off
for model_id, config_class, text_config_class, vision_config_class, model_class in [
("HuggingFaceM4/idefics2-8b", Idefics2Config, MistralConfig, Idefics2VisionConfig, Idefics2ForConditionalGeneration),
("llava-hf/llava-1.5-7b-hf", LlavaConfig, LlamaConfig, CLIPVisionConfig, LlavaForConditionalGeneration),
("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextConfig, MistralConfig, CLIPVisionConfig, LlavaNextForConditionalGeneration),
("google/paligemma-3b-pt-224", PaliGemmaConfig, GemmaConfig, SiglipVisionConfig, PaliGemmaForConditionalGeneration),
]:
# fmt: on
processor = AutoProcessor.from_pretrained(model_id)
kwargs = {}
if config_class == PaliGemmaConfig:
kwargs["projection_dim"] = 8
vision_kwargs = {}
if vision_config_class in [CLIPVisionConfig, SiglipVisionConfig]:
vision_kwargs["projection_dim"] = 8
if vision_config_class == CLIPVisionConfig:
vision_kwargs["image_size"] = 336
vision_kwargs["patch_size"] = 14
config = config_class(
text_config=text_config_class(
vocab_size=processor.tokenizer.vocab_size + len(processor.tokenizer.added_tokens_encoder),
hidden_size=8,
num_attention_heads=4,
num_key_value_heads=2,
num_hidden_layers=2,
intermediate_size=32,
),
vision_config=vision_config_class(
hidden_size=8,
num_attention_heads=4,
num_hidden_layers=2,
intermediate_size=32,
**vision_kwargs,
),
**kwargs,
)
model = model_class(config)
push_to_hub(model, processor)
4 changes: 2 additions & 2 deletions tests/slow/testing_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

# TODO: push them under trl-org
MODELS_TO_TEST = [
"trl-internal-testing/tiny-random-LlamaForCausalLM",
"HuggingFaceM4/tiny-random-MistralForCausalLM",
"trl-internal-testing/tiny-LlamaForCausalLM-3.2",
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
]

# We could have also not declared these variables but let's be verbose
Expand Down
36 changes: 17 additions & 19 deletions tests/test_bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,30 @@

class BCOTrainerTester(unittest.TestCase):
def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token

# get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab"
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)

# get embedding model
model_id = "facebook/bart-base"
model_id = "trl-internal-testing/tiny-BartModel"
self.embedding_model = AutoModel.from_pretrained(model_id)
self.embedding_tokenizer = AutoTokenizer.from_pretrained(model_id)

@parameterized.expand(
[
["gpt2", True, True, "standard_unpaired_preference"],
["gpt2", True, False, "standard_unpaired_preference"],
["gpt2", False, True, "standard_unpaired_preference"],
["gpt2", False, False, "standard_unpaired_preference"],
["gpt2", True, True, "conversational_unpaired_preference"],
("qwen", True, True, "standard_unpaired_preference"),
("qwen", True, False, "standard_unpaired_preference"),
("qwen", False, True, "standard_unpaired_preference"),
("qwen", False, False, "standard_unpaired_preference"),
("qwen", True, True, "conversational_unpaired_preference"),
]
)
@require_sklearn
Expand All @@ -73,7 +73,7 @@ def test_bco_trainer(self, name, pre_compute, eval_dataset, config_name):

dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)

if name == "gpt2":
if name == "qwen":
model = self.model
ref_model = self.ref_model
tokenizer = self.tokenizer
Expand Down Expand Up @@ -160,9 +160,9 @@ def test_tokenize_and_process_tokens(self):
self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"])
self.assertListEqual(tokenized_dataset["label"], train_dataset["label"])
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [5377, 11141])
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1, 1])
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [318, 1365, 621, 8253, 13])
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [374, 2664, 1091, 16965, 13])
self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1])

fn_kwargs = {
Expand All @@ -178,15 +178,13 @@ def test_tokenize_and_process_tokens(self):
self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(processed_dataset["completion"], train_dataset["completion"])
self.assertListEqual(processed_dataset["label"], train_dataset["label"])
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [50256, 5377, 11141])
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1, 1, 1])
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual(
processed_dataset["completion_input_ids"][0], [50256, 5377, 11141, 318, 1365, 621, 8253, 13, 50256]
)
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(
processed_dataset["completion_labels"][0], [-100, -100, -100, 318, 1365, 621, 8253, 13, 50256]
processed_dataset["completion_input_ids"][0], [31137, 374, 2664, 1091, 16965, 13, 151645]
)
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(processed_dataset["completion_labels"][0], [-100, 374, 2664, 1091, 16965, 13, 151645])

@require_sklearn
def test_bco_trainer_without_providing_ref_model(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_best_of_n_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class BestOfNSamplerTester(unittest.TestCase):
Tests the BestOfNSampler class
"""

ref_model_name = "trl-internal-testing/dummy-GPT2-correct-vocab"
ref_model_name = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
output_length_sampler = LengthSampler(2, 6)
model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name)
tokenizer = AutoTokenizer.from_pretrained(ref_model_name)
Expand Down
14 changes: 7 additions & 7 deletions tests/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def __init__(self, model, ref_model, args, train_dataset, eval_dataset, processi

class WinRateCallbackTester(unittest.TestCase):
def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8))
Expand Down Expand Up @@ -219,8 +219,8 @@ def test_lora(self):
@require_wandb
class LogCompletionsCallbackTester(unittest.TestCase):
def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8))
Expand Down Expand Up @@ -283,8 +283,8 @@ def test_basic(self):
)
class MergeModelCallbackTester(unittest.TestCase):
def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM")
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")

def test_callback(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class CLITester(unittest.TestCase):
def test_sft_cli(self):
try:
subprocess.run(
"trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine",
"trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine",
shell=True,
check=True,
)
Expand All @@ -32,7 +32,7 @@ def test_sft_cli(self):
def test_dpo_cli(self):
try:
subprocess.run(
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
shell=True,
check=True,
)
Expand Down
Loading

0 comments on commit 7058ca0

Please sign in to comment.