We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1767668 commit 2426221Copy full SHA for 2426221
QEfficient/transformers/models/modeling_auto.py
@@ -1412,6 +1412,8 @@ def kv_offload_generate(
1412
if x.startswith("past_") or x.endswith("_RetainedState")
1413
]
1414
)
1415
+ if not_mllama:
1416
+ lang_session.skip_buffers(vision_outputs.keys())
1417
1418
# Get first token
1419
lang_inputs["input_ids"] = outputs["logits"].argmax(2)
QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py
@@ -953,6 +953,7 @@ def smart_resize(
953
grid_height = grid_h * grid_w
954
grid_width = patch_size * patch_size * temporal_patch_size * channel
955
vision_size = grid_height // 4
956
+ vision_size = vision_size * num_frames
957
grid_height = grid_height * batch_size
958
959
vision = [
0 commit comments