Skip to content

Commit 2f81d58

Browse files
yonigozlanita.zaporozhets@huggingface.co
authored andcommitted
Remove null values from fast image processors dict (#42780)
* remove null values from saved preporcessor file for fast image processor * preserve explicit None values != class default * Fix flava test * extend to video processor
1 parent a7b1988 commit 2f81d58

File tree

6 files changed

+95
-17
lines changed

6 files changed

+95
-17
lines changed

src/transformers/image_processing_utils_fast.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,18 @@ def _preprocess(
936936

937937
def to_dict(self):
938938
encoder_dict = super().to_dict()
939-
encoder_dict.pop("_valid_processor_keys", None)
940-
encoder_dict.pop("_valid_kwargs_names", None)
941-
return encoder_dict
939+
940+
# Filter out None values that are class defaults, but preserve explicitly set None values
941+
filtered_dict = {}
942+
for key, value in encoder_dict.items():
943+
if value is None:
944+
class_default = getattr(type(self), key, "NOT_FOUND")
945+
# Keep None if user explicitly set it (class default is non-None)
946+
if class_default != "NOT_FOUND" and class_default is not None:
947+
filtered_dict[key] = value
948+
else:
949+
filtered_dict[key] = value
950+
951+
filtered_dict.pop("_valid_processor_keys", None)
952+
filtered_dict.pop("_valid_kwargs_names", None)
953+
return filtered_dict

src/transformers/video_processing_utils.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -770,11 +770,21 @@ def to_dict(self) -> dict[str, Any]:
770770
`dict[str, Any]`: Dictionary of all the attributes that make up this video processor instance.
771771
"""
772772
output = deepcopy(self.__dict__)
773-
output.pop("model_valid_processing_keys", None)
774-
output.pop("_valid_kwargs_names", None)
775-
output["video_processor_type"] = self.__class__.__name__
773+
filtered_dict = {}
774+
for key, value in output.items():
775+
if value is None:
776+
class_default = getattr(type(self), key, "NOT_FOUND")
777+
# Keep None if user explicitly set it (class default is non-None)
778+
if class_default != "NOT_FOUND" and class_default is not None:
779+
filtered_dict[key] = value
780+
else:
781+
filtered_dict[key] = value
782+
783+
filtered_dict.pop("model_valid_processing_keys", None)
784+
filtered_dict.pop("_valid_kwargs_names", None)
785+
filtered_dict["video_processor_type"] = self.__class__.__name__
776786

777-
return output
787+
return filtered_dict
778788

779789
def to_json_string(self) -> str:
780790
"""

tests/models/flava/test_image_processing_flava.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def __init__(
6666
image_std=FLAVA_IMAGE_STD,
6767
input_size_patches=14,
6868
total_mask_patches=75,
69-
mask_group_max_patches=None,
7069
mask_group_min_patches=16,
7170
mask_group_min_aspect_ratio=0.3,
7271
mask_group_max_aspect_ratio=None,
@@ -103,7 +102,6 @@ def __init__(
103102

104103
self.input_size_patches = input_size_patches
105104
self.total_mask_patches = total_mask_patches
106-
self.mask_group_max_patches = mask_group_max_patches
107105
self.mask_group_min_patches = mask_group_min_patches
108106
self.mask_group_min_aspect_ratio = mask_group_min_aspect_ratio
109107
self.mask_group_max_aspect_ratio = mask_group_max_aspect_ratio
@@ -133,7 +131,6 @@ def prepare_image_processor_dict(self):
133131
"crop_size": self.crop_size,
134132
"input_size_patches": self.input_size_patches,
135133
"total_mask_patches": self.total_mask_patches,
136-
"mask_group_max_patches": self.mask_group_max_patches,
137134
"mask_group_min_patches": self.mask_group_min_patches,
138135
"mask_group_min_aspect_ratio": self.mask_group_min_aspect_ratio,
139136
"mask_group_max_aspect_ratio": self.mask_group_min_aspect_ratio,

tests/models/mask2former/test_image_processing_mask2former.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ def __init__(
5757
num_labels=10,
5858
do_reduce_labels=True,
5959
ignore_index=255,
60-
pad_size=None,
6160
):
6261
self.parent = parent
6362
self.batch_size = batch_size
@@ -70,7 +69,6 @@ def __init__(
7069
self.image_mean = image_mean
7170
self.image_std = image_std
7271
self.size_divisor = 0
73-
self.pad_size = pad_size
7472
# for the post_process_functions
7573
self.batch_size = 2
7674
self.num_queries = 3
@@ -92,7 +90,6 @@ def prepare_image_processor_dict(self):
9290
"num_labels": self.num_labels,
9391
"do_reduce_labels": self.do_reduce_labels,
9492
"ignore_index": self.ignore_index,
95-
"pad_size": self.pad_size,
9693
}
9794

9895
def get_expected_values(self, image_inputs, batched=False):

tests/test_image_processing_common.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,10 @@ def test_save_load_fast_slow(self):
341341
}
342342
dict_fast_0 = {key: dict_fast_0[key] for key in set(dict_fast_0) & set(dict_fast_1)}
343343
dict_fast_1 = {key: dict_fast_1[key] for key in set(dict_fast_0) & set(dict_fast_1)}
344-
# check that all additional keys are None, except for `default_to_square` and `data_format` which are only set in fast processors
344+
# Fast processors filter None values from to_dict(), so differences should only be special keys
345345
self.assertTrue(
346-
all(value is None for key, value in difference.items() if key not in ["default_to_square", "data_format"])
346+
all(key in ["default_to_square", "data_format"] for key in difference.keys()),
347+
f"Fast processors should only differ in special keys, found: {list(difference.keys())}",
347348
)
348349
# check that the remaining keys are the same
349350
self.assertEqual(dict_fast_0, dict_fast_1)
@@ -391,9 +392,10 @@ def test_save_load_fast_slow_auto(self):
391392
}
392393
dict_fast_0 = {key: dict_fast_0[key] for key in set(dict_fast_0) & set(dict_fast_1)}
393394
dict_fast_1 = {key: dict_fast_1[key] for key in set(dict_fast_0) & set(dict_fast_1)}
394-
# check that all additional keys are None, except for `default_to_square` and `data_format` which are only set in fast processors
395+
# Fast processors filter None values from to_dict(), so differences should only be special keys
395396
self.assertTrue(
396-
all(value is None for key, value in difference.items() if key not in ["default_to_square", "data_format"])
397+
all(key in ["default_to_square", "data_format"] for key in difference.keys()),
398+
f"Fast processors should only differ in special keys, found: {list(difference.keys())}",
397399
)
398400
# check that the remaining keys are the same
399401
self.assertEqual(dict_fast_0, dict_fast_1)
@@ -693,6 +695,37 @@ def _is_old_model_by_commit_date(model_type, date_cutoff=(2025, 9, 1)):
693695
f"a fast image processor implementation. Please implement the corresponding fast processor.",
694696
)
695697

698+
def test_fast_image_processor_explicit_none_preserved(self):
699+
"""Test that explicitly setting an attribute to None is preserved through save/load."""
700+
if self.fast_image_processing_class is None:
701+
self.skipTest("Skipping test as fast image processor is not defined")
702+
703+
# Find an attribute with a non-None class default to test explicit None override
704+
test_attr = None
705+
for attr in ["do_resize", "do_rescale", "do_normalize"]:
706+
if getattr(self.fast_image_processing_class, attr, None) is not None:
707+
test_attr = attr
708+
break
709+
710+
if test_attr is None:
711+
self.skipTest("Could not find a suitable attribute to test")
712+
713+
# Create processor with explicit None (override the attribute)
714+
kwargs = self.image_processor_dict.copy()
715+
kwargs[test_attr] = None
716+
image_processor = self.fast_image_processing_class(**kwargs)
717+
718+
# Verify it's in to_dict() as None (not filtered out)
719+
self.assertIn(test_attr, image_processor.to_dict())
720+
self.assertIsNone(image_processor.to_dict()[test_attr])
721+
722+
# Verify explicit None survives save/load cycle
723+
with tempfile.TemporaryDirectory() as tmpdirname:
724+
image_processor.save_pretrained(tmpdirname)
725+
reloaded = self.fast_image_processing_class.from_pretrained(tmpdirname)
726+
727+
self.assertIsNone(getattr(reloaded, test_attr), f"Explicit None for {test_attr} was lost after reload")
728+
696729

697730
class AnnotationFormatTestMixin:
698731
# this mixin adds a test to assert that usages of the

tests/test_video_processing_common.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,35 @@ def test_init_without_params(self):
167167
video_processor = video_processing_class()
168168
self.assertIsNotNone(video_processor)
169169

170+
def test_video_processor_explicit_none_preserved(self):
171+
"""Test that explicitly setting an attribute to None is preserved through save/load."""
172+
173+
# Find an attribute with a non-None class default to test explicit None override
174+
test_attr = None
175+
for attr in ["do_resize", "do_rescale", "do_normalize"]:
176+
if getattr(self.fast_video_processing_class, attr, None) is not None:
177+
test_attr = attr
178+
break
179+
180+
if test_attr is None:
181+
self.skipTest("Could not find a suitable attribute to test")
182+
183+
# Create processor with explicit None (override the attribute)
184+
kwargs = self.video_processor_dict.copy()
185+
kwargs[test_attr] = None
186+
video_processor = self.fast_video_processing_class(**kwargs)
187+
188+
# Verify it's in to_dict() as None (not filtered out)
189+
self.assertIn(test_attr, video_processor.to_dict())
190+
self.assertIsNone(video_processor.to_dict()[test_attr])
191+
192+
# Verify explicit None survives save/load cycle
193+
with tempfile.TemporaryDirectory() as tmpdirname:
194+
video_processor.save_pretrained(tmpdirname)
195+
reloaded = self.fast_video_processing_class.from_pretrained(tmpdirname)
196+
197+
self.assertIsNone(getattr(reloaded, test_attr), f"Explicit None for {test_attr} was lost after reload")
198+
170199
@slow
171200
@require_torch_accelerator
172201
@require_vision

0 commit comments

Comments
 (0)