Skip to content

Commit b69bde5

Browse files
mohiso22Mohit Soni
authored andcommitted
Modeling fix (quic#605)
Signed-off-by: Mohit Soni <mohisoni@qti.qualcom.com> Co-authored-by: Mohit Soni <mohisoni@qti.qualcom.com>
1 parent f82d563 commit b69bde5

File tree

2 files changed

+3
-0
lines changed

2 files changed

+3
-0
lines changed

QEfficient/transformers/models/modeling_auto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,8 @@ def kv_offload_generate(
14191419
if x.startswith("past_") or x.endswith("_RetainedState")
14201420
]
14211421
)
1422+
if not_mllama:
1423+
lang_session.skip_buffers(vision_outputs.keys())
14221424

14231425
# Get first token
14241426
lang_inputs["input_ids"] = outputs["logits"].argmax(2)

QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,7 @@ def smart_resize(
953953
grid_height = grid_h * grid_w
954954
grid_width = patch_size * patch_size * temporal_patch_size * channel
955955
vision_size = grid_height // 4
956+
vision_size = vision_size * num_frames
956957
grid_height = grid_height * batch_size
957958

958959
vision = [

0 commit comments

Comments
 (0)