Skip to content

Commit f7ba365

Browse files
mgoinArthurZucker
authored andcommitted
Fix PixtralProcessor patch_size when spatial_merge_size is used (#37019)
1 parent b258dc3 commit f7ba365

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/transformers/models/pixtral/processing_pixtral.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ def __call__(
156156
**kwargs,
157157
)
158158

159+
patch_size = self.patch_size * self.spatial_merge_size
160+
159161
if images is not None:
160162
if is_image_or_image_url(images):
161163
images = [images]
@@ -172,7 +174,7 @@ def __call__(
172174
"Invalid input images. Please provide a single image, a list of images, or a list of lists of images."
173175
)
174176
images = [load_image(im) if isinstance(im, str) else im for im in images]
175-
image_inputs = self.image_processor(images, patch_size=self.patch_size, **output_kwargs["images_kwargs"])
177+
image_inputs = self.image_processor(images, patch_size=patch_size, **output_kwargs["images_kwargs"])
176178
else:
177179
image_inputs = {}
178180

@@ -192,8 +194,8 @@ def __call__(
192194
for sample in text:
193195
while self.image_token in sample:
194196
height, width = next(image_sizes)
195-
num_height_tokens = height // (self.patch_size * self.spatial_merge_size)
196-
num_width_tokens = width // (self.patch_size * self.spatial_merge_size)
197+
num_height_tokens = height // patch_size
198+
num_width_tokens = width // patch_size
197199
replace_tokens = [
198200
[self.image_token] * num_width_tokens + [self.image_break_token]
199201
] * num_height_tokens

0 commit comments

Comments
 (0)