Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/models/aya_vision/test_modeling_aya_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
"vocab_size": 99,
"hidden_size": 128,
"intermediate_size": 37,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"output_channels": 64,
"hidden_act": "silu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/bamba/test_modeling_bamba.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
num_key_value_heads=2,
intermediate_size=64,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/bitnet/test_modeling_bitnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
use_input_mask=True,
vocab_size=99,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
num_key_value_heads=2,
intermediate_size=37,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/bros/test_modeling_bros.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
use_labels=True,
vocab_size=99,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/cohere/test_modeling_cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
"vocab_size": 99,
"hidden_size": 128,
"intermediate_size": 37,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"output_channels": 64,
"hidden_act": "silu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/deepseek_v3/test_modeling_deepseek_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
hidden_size=32,
intermediate_size=37,
moe_intermediate_size=12,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
num_key_value_heads=4,
n_shared_experts=1,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/eomt/test_modeling_eomt.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
num_labels=4,
hidden_size=8,
num_attention_heads=2,
num_hidden_layers=4,
num_hidden_layers=2,
):
self.parent = parent
self.batch_size = batch_size
Expand Down
2 changes: 1 addition & 1 deletion tests/models/falcon/test_modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def test_falcon_alibi_sdpa_matches_eager(self):
config = FalconConfig(
vocab_size=1000,
hidden_size=64,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
new_decoder_architecture=True,
alibi=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/falcon_h1/test_modeling_falcon_h1.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
num_key_value_heads=2,
intermediate_size=64,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/got_ocr2/test_modeling_got_ocr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(
"vocab_size": 99,
"hidden_size": 128,
"intermediate_size": 37,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"num_key_value_heads": 2,
"output_channels": 64,
Expand Down
4 changes: 2 additions & 2 deletions tests/models/idefics/test_modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
Expand All @@ -85,7 +85,7 @@ def __init__(
vision_patch_size=2,
vision_image_size=30,
vision_num_attention_heads=4,
vision_num_hidden_layers=5,
vision_num_hidden_layers=2,
vision_intermediate_size=37,
perceiver_qk_layer_norms_perceiver=False,
perceiver_resampler_depth=2,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/idefics2/test_modeling_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
"vocab_size": 100,
"hidden_size": 64,
"intermediate_size": 56,
"num_hidden_layers": 3,
"num_hidden_layers": 2,
"num_attention_heads": 2,
"num_key_value_heads": 2,
"hidden_act": "silu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/idefics3/test_modeling_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(
"vocab_size": 100,
"hidden_size": 64,
"intermediate_size": 56,
"num_hidden_layers": 3,
"num_hidden_layers": 2,
"num_attention_heads": 2,
"num_key_value_heads": 2,
"hidden_act": "silu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/internvl/test_modeling_internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(
"vocab_size": 99,
"hidden_size": 128,
"intermediate_size": 37,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"num_key_value_heads": 2,
"output_channels": 64,
Expand Down
4 changes: 2 additions & 2 deletions tests/models/longcat_flash/test_modeling_longcat_flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(
hidden_size=144,
ffn_hidden_size=288,
expert_ffn_hidden_size=48,
num_layers=2,
num_layers=1, # We have `self.num_hidden_layers = 2 * num_layers` in the body. See `LongcatFlashConfig`.
num_attention_heads=8,
num_key_value_heads=8,
kv_lora_rank=16,
Expand Down Expand Up @@ -96,7 +96,7 @@ def __init__(
self.expert_ffn_hidden_size = expert_ffn_hidden_size
self.num_layers = num_layers
self.num_hidden_layers = 2 * num_layers # for compatibility
self.expected_num_hidden_layers = 3 # embedding + 2 layers
self.expected_num_hidden_layers = 2 # embedding + 2 layers
self.num_attention_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads
self.kv_lora_rank = kv_lora_rank
Expand Down
2 changes: 1 addition & 1 deletion tests/models/lxmert/test_modeling_lxmert.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(
num_object_labels=16,
num_attr_labels=4,
num_visual_features=10,
l_layers=2,
l_layers=1,
x_layers=1,
r_layers=1,
visual_feat_dim=128,
Expand Down
4 changes: 2 additions & 2 deletions tests/models/mllama/test_modeling_mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def __init__(
"model_type": "mllama",
"vocab_size": 99,
"hidden_size": 32,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"num_key_value_heads": 4,
"intermediate_size": 37,
Expand All @@ -166,7 +166,7 @@ def __init__(
"intermediate_layers_indices": [0],
"vision_output_dim": 32,
"projection_dim": 32,
"num_hidden_layers": 6,
"num_hidden_layers": 2,
"num_global_layers": 2,
"num_attention_heads": 4,
"intermediate_size": 37,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/pop2piano/test_modeling_pop2piano.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
use_attention_mask=True,
use_labels=True,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(
"vocab_size": 99,
"hidden_size": 32,
"intermediate_size": 37,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_attention_heads": 4,
"num_key_value_heads": 2,
"hidden_act": "silu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(
max_window_layers=3,
model_type="qwen2_5_vl",
num_attention_heads=4,
num_hidden_layers=4,
num_hidden_layers=2,
num_key_value_heads=2,
rope_theta=10000,
tie_word_embeddings=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/qwen2_vl/test_modeling_qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def __init__(
max_window_layers=3,
model_type="qwen2_vl",
num_attention_heads=4,
num_hidden_layers=4,
num_hidden_layers=2,
num_key_value_heads=2,
rope_theta=10000,
tie_word_embeddings=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/qwen3_vl/test_modeling_qwen3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
"max_position_embeddings": 512,
"model_type": "qwen3_vl",
"num_attention_heads": 4,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"num_key_value_heads": 2,
"rope_theta": 10000,
"tie_word_embeddings": True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/qwen3_vl_moe/test_modeling_qwen3_vl_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
"model_type": "qwen3_vl_moe",
"num_attention_heads": 4,
"num_key_value_heads": 2,
"num_hidden_layers": 4,
"num_hidden_layers": 2,
"moe_intermediate_size": 16,
"num_experts_per_tok": 4,
"num_experts": 8,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/reformer/test_modeling_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def __init__(
axial_pos_embds=True,
axial_pos_shape=[4, 8],
axial_pos_embds_dim=[16, 16],
attn_layers=["local", "local", "local", "local"],
attn_layers=["local", "local"],
pad_token_id=0,
eos_token_id=2,
scope=None,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/smolvlm/test_modeling_smolvlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
"vocab_size": 100,
"hidden_size": 64,
"intermediate_size": 56,
"num_hidden_layers": 3,
"num_hidden_layers": 2,
"num_attention_heads": 2,
"num_key_value_heads": 2,
"hidden_act": "silu",
Expand Down
4 changes: 2 additions & 2 deletions tests/models/udop/test_modeling_udop.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=32,
Expand Down Expand Up @@ -425,7 +425,7 @@ def __init__(
is_training=False,
use_attention_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
decoder_layers=2,
num_attention_heads=4,
d_ff=37,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/vitpose/test_modeling_vitpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
Expand Down
2 changes: 1 addition & 1 deletion tests/models/vjepa2/test_modeling_vjepa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
patch_size=16,
num_channels=3,
hidden_size=32,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
num_frames=2,
mlp_ratio=1,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/xlnet/test_modeling_xlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(
self.hidden_size = 32
self.num_attention_heads = 4
self.d_inner = 128
self.num_hidden_layers = 5
self.num_hidden_layers = 3
self.type_sequence_label_size = 2
self.bi_data = False
self.same_length = False
Expand Down
39 changes: 39 additions & 0 deletions tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,45 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

return inputs_dict

def test_num_layers_is_small(self):
# TODO (if possible): Avoid exceptional cases, especially for `OwlViT`.
# ⛔ DO NOT edit this list (unless there is really nothing to tweak in the model tester class and approved by the reviewer) ⛔!
exceptional_num_hidden_layers = {
# TODO: There might be some way to fix
"FunnelModelTest": 5,
"FunnelBaseModelTest": 4,
"GroupViTVisionModelTest": 12,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😭

"OwlViTModelTest": 12,
"OwlViTTextModelTest": 12,
"OwlViTForObjectDetectionTest": 12,
"Owlv2ModelTest": 12,
"Owlv2TextModelTest": 12,
"Owlv2ForObjectDetectionTest": 12,
"SamHQModelTest": 12,
"Swin2SRModelTest": 3,
"XLNetModelTest": 3,
"DPTModelTest": 4, # `test_modeling_dpt_hybrid.py`: not able to get it work after change `num_hidden_layers` and `neck_hidden_sizes`
# Nothing we can't do
"Gemma3nTextModelTest": 4, # need to test KV shared layer for both types: `full_attention` and `sliding_attention`
"BeitModelTest": 4, # BeitForSemanticSegmentation requires config.out_indices to be a list of 4 integers
"ZambaModelTest": 5, # The minimum number to test beyond the initial ["mamba", "mamba", "hybrid"] in `ZambaConfig._layers_block_type`
}
target_num_hidden_layers = exceptional_num_hidden_layers.get(type(self).__name__, 2)

if hasattr(self.model_tester, "num_hidden_layers") and isinstance(self.model_tester.num_hidden_layers, int):
assert self.model_tester.num_hidden_layers <= target_num_hidden_layers

if hasattr(self.model_tester, "vision_config") and "num_hidden_layers" in self.model_tester.vision_config:
if isinstance(self.model_tester.vision_config, dict):
assert self.model_tester.vision_config["num_hidden_layers"] <= target_num_hidden_layers
else:
assert self.model_tester.vision_config.num_hidden_layers <= target_num_hidden_layers
if hasattr(self.model_tester, "text_config") and "num_hidden_layers" in self.model_tester.text_config:
if isinstance(self.model_tester.text_config, dict):
assert self.model_tester.text_config["num_hidden_layers"] <= target_num_hidden_layers
else:
assert self.model_tester.text_config.num_hidden_layers <= target_num_hidden_layers

Comment on lines +677 to +715
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the point of this PR

def test_save_load(self):
def check_save_load(out1, out2):
# make sure we don't have nans
Expand Down