Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/models/gemma3n/configuration_gemma3n.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,10 +502,10 @@ def __init__(
**kwargs,
):
super().__init__(**kwargs)
self.architecture = architecture
self.initializer_range = initializer_range
self.do_pooling = do_pooling
self.model_args = model_args # named "model_args" for BC with timm
self.architecture = architecture
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.vocab_offset = vocab_offset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class TimmWrapperConfig(PretrainedConfig):
imagenet models is set to `None` due to occlusions in the label descriptions.

Args:
architecture (`str`, *optional*, defaults to `"resnet50"`):
The timm architecture to load.
Comment on lines 43 to +45
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @qubvel on those changes! TimmWrapperConfig does not have an architecture field even though it's required in modeling - was it intentional?

Copy link
Contributor

@qubvel qubvel Sep 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Cyrilvallez looks like it's missed indeed! Probably I mix it up with architectureS in a base class

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright perfect, thanks for double checking!

initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
do_pooling (`bool`, *optional*, defaults to `True`):
Expand All @@ -65,11 +67,13 @@ class TimmWrapperConfig(PretrainedConfig):

def __init__(
self,
architecture: str = "resnet50",
initializer_range: float = 0.02,
do_pooling: bool = True,
model_args: Optional[dict[str, Any]] = None,
**kwargs,
):
self.architecture = architecture
self.initializer_range = initializer_range
self.do_pooling = do_pooling
self.model_args = model_args # named "model_args" for BC with timm
Expand Down
10 changes: 1 addition & 9 deletions tests/models/chameleon/test_modeling_chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(
pad_token_id=0,
vq_num_embeds=5,
vq_embed_dim=5,
vq_channel_multiplier=[1, 4],
vq_channel_multiplier=[1, 2],
vq_img_token_start_id=10, # has to be less than vocab size when added with vq_num_embeds
scope=None,
):
Expand Down Expand Up @@ -255,10 +255,6 @@ def test_model_rope_scaling(self, scaling_type):
def test_batching_equivalence(self):
pass

@unittest.skip("Chameleon VQ model cannot be squishes more due to hardcoded layer params in model code")
def test_model_is_small(self):
pass


class ChameleonVision2SeqModelTester(ChameleonModelTester):
def __init__(self, parent, image_size=10, **kwargs):
Expand Down Expand Up @@ -321,10 +317,6 @@ def test_disk_offload_bin(self):
def test_disk_offload_safetensors(self):
pass

@unittest.skip("Chameleon VQ model cannot be squishes more due to hardcoded layer params in model code")
def test_model_is_small(self):
pass

@unittest.skip("Chameleon applies key/query norm which doesn't work with packing")
def test_flash_attention_2_padding_matches_padding_free_with_position_ids(self):
pass
Expand Down
4 changes: 0 additions & 4 deletions tests/models/emu3/test_modeling_emu3.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,6 @@ def test_initialization(self):
def test_generate_with_static_cache(self):
pass

# @unittest.skip("Emu3 can't be smaller than currently if we want to downsample images")
# def test_model_is_small(self):
# pass


@require_torch
class Emu3IntegrationTest(unittest.TestCase):
Expand Down
20 changes: 10 additions & 10 deletions tests/models/layoutlmv2/test_modeling_layoutlmv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
image_feature_pool_shape=[7, 7, 256],
image_feature_pool_shape=[7, 7, 32],
coordinate_size=6,
shape_size=6,
num_labels=3,
Expand Down Expand Up @@ -106,6 +106,14 @@ def __init__(
self.num_choices = num_choices
self.scope = scope
self.range_bbox = range_bbox
detectron2_config = LayoutLMv2Config.get_default_detectron2_config()
# We need to make the model smaller
detectron2_config["MODEL.RESNETS.DEPTH"] = 50
detectron2_config["MODEL.RESNETS.RES2_OUT_CHANNELS"] = 4
detectron2_config["MODEL.RESNETS.STEM_OUT_CHANNELS"] = 4
detectron2_config["MODEL.FPN.OUT_CHANNELS"] = 32
detectron2_config["MODEL.RESNETS.NUM_GROUPS"] = 1
self.detectron2_config = detectron2_config

def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
Expand Down Expand Up @@ -158,13 +166,9 @@ def prepare_config_and_inputs(self):
image_feature_pool_shape=self.image_feature_pool_shape,
coordinate_size=self.coordinate_size,
shape_size=self.shape_size,
detectron2_config_args=self.detectron2_config,
)

# use smaller resnet backbone to make tests faster
config.detectron2_config_args["MODEL.RESNETS.DEPTH"] = 18
config.detectron2_config_args["MODEL.RESNETS.RES2_OUT_CHANNELS"] = 64
config.detectron2_config_args["MODEL.RESNETS.NUM_GROUPS"] = 1

return config, input_ids, bbox, image, token_type_ids, input_mask, sequence_labels, token_labels

def create_and_check_model(
Expand Down Expand Up @@ -422,10 +426,6 @@ def check_hidden_states_output(inputs_dict, config, model_class):

check_hidden_states_output(inputs_dict, config, model_class)

@unittest.skip(reason="We cannot configure detectron2 to output a smaller backbone")
def test_model_is_small(self):
pass

@slow
def test_model_from_pretrained(self):
model_name = "microsoft/layoutlmv2-base-uncased"
Expand Down
4 changes: 0 additions & 4 deletions tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,6 @@ def test_sdpa_can_dispatch_on_flash(self):
def test_multi_gpu_data_parallel_forward(self):
pass

@unittest.skip(reason="We cannot configure to output a smaller model.")
def test_model_is_small(self):
pass


@require_torch
class Qwen2_5_VLIntegrationTest(unittest.TestCase):
Expand Down
4 changes: 0 additions & 4 deletions tests/models/qwen2_vl/test_modeling_qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,6 @@ def test_sdpa_can_dispatch_on_flash(self):
def test_multi_gpu_data_parallel_forward(self):
pass

@unittest.skip(reason="We cannot configure to output a smaller model.")
def test_model_is_small(self):
pass


@require_torch
class Qwen2VLIntegrationTest(unittest.TestCase):
Expand Down
11 changes: 4 additions & 7 deletions tests/models/timm_wrapper/test_modeling_timm_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,15 @@ class TimmWrapperModelTester:
def __init__(
self,
parent,
model_name="timm/resnet18.a1_in1k",
batch_size=3,
image_size=32,
num_channels=3,
is_training=True,
):
self.parent = parent
self.model_name = model_name
self.architecture = "resnet26"
# We need this to make the model smaller
self.model_args = {"channels": (16, 16, 16, 16)}
self.batch_size = batch_size
self.image_size = image_size
self.num_channels = num_channels
Expand All @@ -73,7 +74,7 @@ def prepare_config_and_inputs(self):
return config, pixel_values

def get_config(self):
return TimmWrapperConfig.from_pretrained(self.model_name)
return TimmWrapperConfig(architecture=self.architecture, model_args=self.model_args)

def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
Expand Down Expand Up @@ -166,10 +167,6 @@ def test_initialization(self):
def test_mismatched_shapes_have_properly_initialized_weights(self):
pass

@unittest.skip(reason="Need to use a timm model and there is no tiny model available.")
def test_model_is_small(self):
pass

def test_gradient_checkpointing(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
model = TimmWrapperModel._from_config(config)
Expand Down
20 changes: 14 additions & 6 deletions tests/models/xcodec/test_modeling_xcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
if is_torch_available():
import torch

from transformers import XcodecModel
from transformers import DacConfig, HubertConfig, XcodecModel


@require_torch
Expand All @@ -51,7 +51,7 @@ def __init__(
num_channels=1,
sample_rate=16000,
codebook_size=1024,
num_samples=400,
num_samples=256,
is_training=False,
):
self.parent = parent
Expand All @@ -61,6 +61,16 @@ def __init__(
self.codebook_size = codebook_size
self.is_training = is_training
self.num_samples = num_samples
self.acoustic_model_config = DacConfig(
decoder_hidden_size=8, encoder_hidden_size=8, codebook_size=16, downsampling_ratios=[16, 16]
)
self.semantic_model_config = HubertConfig(
hidden_size=32,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=12,
conv_dim=(4, 4, 4, 4, 4, 4, 4),
)

def prepare_config_and_inputs(self):
config = self.get_config()
Expand All @@ -86,6 +96,8 @@ def get_config(self):
sample_rate=self.sample_rate,
audio_channels=self.num_channels,
codebook_size=self.codebook_size,
acoustic_model_config=self.acoustic_model_config,
semantic_model_config=self.semantic_model_config,
)

def create_and_check_model_forward(self, config, inputs_dict):
Expand Down Expand Up @@ -151,10 +163,6 @@ def test_gradient_checkpointing_backward_compatibility(self):
model = model_class(config)
self.assertTrue(model.is_gradient_checkpointing)

@unittest.skip(reason="We cannot configure to output a smaller model.")
def test_model_is_small(self):
pass

@unittest.skip(reason="The XcodecModel does not have `inputs_embeds` logics")
def test_inputs_embeds(self):
pass
Expand Down