-
-
Notifications
You must be signed in to change notification settings - Fork 8.4k
Fix #19130 #19132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix #19130 #19132
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -593,21 +593,21 @@ def load_qwen_vl_chat(question: str, image_urls: list[str]) -> ModelRequestData: | |
|
||
def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData: | ||
try: | ||
from qwen_vl_utils import process_vision_info | ||
from qwen_vl_utils import smart_resize | ||
except ModuleNotFoundError: | ||
print( | ||
"WARNING: `qwen-vl-utils` not installed, input images will not " | ||
"be automatically resized. You can enable this functionality by " | ||
"`pip install qwen-vl-utils`." | ||
) | ||
process_vision_info = None | ||
smart_resize = None | ||
|
||
model_name = "Qwen/Qwen2-VL-7B-Instruct" | ||
|
||
# Tested on L40 | ||
engine_args = EngineArgs( | ||
model=model_name, | ||
max_model_len=32768 if process_vision_info is None else 4096, | ||
max_model_len=32768 if smart_resize is None else 4096, | ||
max_num_seqs=5, | ||
limit_mm_per_prompt={"image": len(image_urls)}, | ||
) | ||
|
@@ -630,10 +630,18 @@ def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData: | |
messages, tokenize=False, add_generation_prompt=True | ||
) | ||
|
||
if process_vision_info is None: | ||
if smart_resize is None: | ||
image_data = [fetch_image(url) for url in image_urls] | ||
else: | ||
image_data, _ = process_vision_info(messages) | ||
|
||
def post_process_image(image: Image) -> Image: | ||
width, height = image.size | ||
resized_height, resized_width = smart_resize( | ||
height, width, max_pixels=1024 * 28 * 28 | ||
) | ||
return image.resize((resized_width, resized_height)) | ||
|
||
image_data = [post_process_image(fetch_image(url)) for url in image_urls] | ||
Comment on lines
+636
to
+644
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original code called Does this new approach fully replicate the behavior of the original |
||
|
||
return ModelRequestData( | ||
engine_args=engine_args, | ||
|
@@ -644,20 +652,20 @@ def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData: | |
|
||
def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData: | ||
try: | ||
from qwen_vl_utils import process_vision_info | ||
from qwen_vl_utils import smart_resize | ||
except ModuleNotFoundError: | ||
print( | ||
"WARNING: `qwen-vl-utils` not installed, input images will not " | ||
"be automatically resized. You can enable this functionality by " | ||
"`pip install qwen-vl-utils`." | ||
) | ||
process_vision_info = None | ||
smart_resize = None | ||
|
||
model_name = "Qwen/Qwen2.5-VL-3B-Instruct" | ||
|
||
engine_args = EngineArgs( | ||
model=model_name, | ||
max_model_len=32768 if process_vision_info is None else 4096, | ||
max_model_len=32768 if smart_resize is None else 4096, | ||
max_num_seqs=5, | ||
limit_mm_per_prompt={"image": len(image_urls)}, | ||
) | ||
|
@@ -680,10 +688,18 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData: | |
messages, tokenize=False, add_generation_prompt=True | ||
) | ||
|
||
if process_vision_info is None: | ||
if smart_resize is None: | ||
image_data = [fetch_image(url) for url in image_urls] | ||
else: | ||
image_data, _ = process_vision_info(messages, return_video_kwargs=False) | ||
|
||
def post_process_image(image: Image) -> Image: | ||
width, height = image.size | ||
resized_height, resized_width = smart_resize( | ||
height, width, max_pixels=1024 * 28 * 28 | ||
Comment on lines
+697
to
+698
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
) | ||
return image.resize((resized_width, resized_height)) | ||
|
||
image_data = [post_process_image(fetch_image(url)) for url in image_urls] | ||
princepride marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return ModelRequestData( | ||
engine_args=engine_args, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The value
1024 * 28 * 28
is hardcoded here. Is this value derived from the model's specific requirements or thesmart_resize
function's intended usage? Consider making this a named constant with a clear explanation or deriving it dynamically if possible to improve maintainability and clarity.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just example code, the original code's max_pixels also hardcoded in the function implementation, the pixel is too huge and will exceed the model max length, so I adjust smaller.