File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed
src/transformers/models/pixtral Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -156,6 +156,8 @@ def __call__(
156156 ** kwargs ,
157157 )
158158
159+ patch_size = self .patch_size * self .spatial_merge_size
160+
159161 if images is not None :
160162 if is_image_or_image_url (images ):
161163 images = [images ]
@@ -172,7 +174,7 @@ def __call__(
172174 "Invalid input images. Please provide a single image, a list of images, or a list of lists of images."
173175 )
174176 images = [load_image (im ) if isinstance (im , str ) else im for im in images ]
175- image_inputs = self .image_processor (images , patch_size = self . patch_size , ** output_kwargs ["images_kwargs" ])
177+ image_inputs = self .image_processor (images , patch_size = patch_size , ** output_kwargs ["images_kwargs" ])
176178 else :
177179 image_inputs = {}
178180
@@ -192,8 +194,8 @@ def __call__(
192194 for sample in text :
193195 while self .image_token in sample :
194196 height , width = next (image_sizes )
195- num_height_tokens = height // ( self . patch_size * self . spatial_merge_size )
196- num_width_tokens = width // ( self . patch_size * self . spatial_merge_size )
197+ num_height_tokens = height // patch_size
198+ num_width_tokens = width // patch_size
197199 replace_tokens = [
198200 [self .image_token ] * num_width_tokens + [self .image_break_token ]
199201 ] * num_height_tokens
You can’t perform that action at this time.
0 commit comments