Skip to content

Commit 44f5844

Browse files
committed
Fix
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent a3023e2 commit 44f5844

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm/model_executor/models/molmo.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class MolmoImageInputs(TypedDict):
8383
"""
8484
Starting and ending index of placeholder tokens.
8585
86-
Shape: `(2,)`
86+
Shape: `(batch_size, 2)`
8787
"""
8888

8989

@@ -1151,13 +1151,15 @@ def __call__(
11511151
idxs = inv_idxs.diff(prepend=torch.tensor([-1])).nonzero().squeeze(1)
11521152
assert len(is_image_ids) == len(idxs) == len(counts)
11531153

1154-
image_start_end = list[tuple[int, int]]()
1154+
image_start_end_lst = list[tuple[int, int]]()
11551155
for is_image_id, idx, count in zip(is_image_ids, idxs, counts):
11561156
if is_image_id:
11571157
assert input_ids[idx] in image_ids
1158-
image_start_end.append((idx, idx + count))
1158+
image_start_end_lst.append((idx, idx + count))
11591159

1160-
outputs["image_start_end"] = torch.tensor(image_start_end)
1160+
image_start_end = torch.tensor(image_start_end_lst)
1161+
assert len(image_start_end) <= 1, "Multi-image input not supported yet"
1162+
outputs["image_start_end"] = image_start_end.squeeze(0)
11611163

11621164
return BatchFeature(outputs, tensor_type=return_tensors)
11631165

0 commit comments

Comments
 (0)