Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/transformers/models/idefics3/processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ def __call__(
f"The number of images in the text {n_images_in_text} and images {n_images_in_images} should be the same."
)

image_rows = inputs.pop("rows", [[0] * len(text)])
image_cols = inputs.pop("cols", [[0] * len(text)])
image_rows = inputs.pop("rows", [[0] * n_images for n_images in n_images_in_text])
image_cols = inputs.pop("cols", [[0] * n_images for n_images in n_images_in_text])

fake_image_token = self.fake_image_token
image_token = self.image_token
Expand Down
7 changes: 5 additions & 2 deletions src/transformers/models/smolvlm/processing_smolvlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,6 @@ def __init__(

def expand_text_with_image_tokens(self, text, image_rows, image_cols):
prompt_strings = []
image_rows = image_rows if image_rows is not None else [[0] * len(text)]
image_cols = image_cols if image_cols is not None else [[0] * len(text)]
for sample, sample_rows, sample_cols in zip(text, image_rows, image_cols):
# Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len`
image_prompt_strings = []
Expand Down Expand Up @@ -330,6 +328,11 @@ def __call__(
raise ValueError(
f"The number of images in the text {n_images_in_text} and images {n_images_in_images} should be the same."
)
# Set default values for image_rows and image_cols if not provided
if image_rows is None:
image_rows = [[0] * n_images for n_images in n_images_in_text]
if image_cols is None:
image_cols = [[0] * n_images for n_images in n_images_in_text]
text = self.expand_text_with_image_tokens(text, image_rows=image_rows, image_cols=image_cols)

elif videos is not None:
Expand Down