Closed
Description
Your current environment
outputs = self.llm.generate(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/utils.py", line 1086, in inner
return fn(*args, **kwargs)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 461, in generate
self._validate_and_add_requests(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 1323, in _validate_and_add_requests
self._add_request(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 1341, in _add_request
self.llm_engine.add_request(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/utils.py", line 1086, in inner
return fn(*args, **kwargs)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 751, in add_request
preprocessed_inputs = self.input_preprocessor.preprocess(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/inputs/preprocess.py", line 676, in preprocess
return self._process_decoder_only_prompt(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/inputs/preprocess.py", line 625, in _process_decoder_only_prompt
prompt_comps = self._prompt_to_llm_inputs(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/inputs/preprocess.py", line 354, in _prompt_to_llm_inputs
return self._process_multimodal(
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/inputs/preprocess.py", line 268, in _process_multimodal
return mm_processor.apply(prompt, mm_data, mm_processor_kwargs)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/model_executor/models/minicpmv.py", line 812, in apply
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 1200, in apply
mm_items = self._to_mm_items(mm_data)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/multimodal/processing.py", line 749, in _to_mm_items
mm_items = self.data_parser.parse_mm_data(mm_data)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/multimodal/parse.py", line 366, in parse_mm_data
mm_items[k] = subparsers[k](v)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/model_executor/models/minicpmv.py", line 321, in _parse_image_data
return MiniCPMVImageEmbeddingItems(data)
File "/data/DevEnvironments/miniconda/envs/minicpmo/lib/python3.10/site-packages/vllm/model_executor/models/minicpmv.py", line 148, in __init__
raise ValueError("In correct type of image_embeds",
ValueError: ('In correct type of image_embeds', 'Got type: None')
How would you like to use vllm
I want to run inference of the model /data/models/MiniCPM/MiniCPM-o-2_6
. There is a problem when integrating it with vllm and here is the code.
import os
import uuid
import uvicorn
import traceback
from transformers import AutoTokenizer
from decord import VideoReader, cpu
from PIL import Image
from vllm import LLM, SamplingParams
from typing import Optional
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException, File, UploadFile
from fastapi.responses import JSONResponse
from minio import Minio
MAX_NUM_FRAMES = 10
def encode_video(filepath, frames_num=MAX_NUM_FRAMES):
def uniform_sample(l, n):
gap = len(l) / n
idxs = [int(i * gap + gap / 2) for i in range(n)]
return [l[i] for i in idxs]
vr = VideoReader(filepath, ctx=cpu(0))
sample_fps = round(vr.get_avg_fps() / 1) # FPS
frame_idx = [i for i in range(0, len(vr), sample_fps)]
if len(frame_idx) > frames_num:
frame_idx = uniform_sample(frame_idx, frames_num)
video = vr.get_batch(frame_idx).asnumpy()
video = [Image.fromarray(v.astype("uint8")) for v in video]
return video
class MiniCPMOServer:
def __init__(self, params):
# 单卡
self.llm = LLM(
model=params["MODEL_NAME"],
max_model_len=params["max_model_len"],
gpu_memory_utilization=0.95,
trust_remote_code=True,
enforce_eager=True,
)
self.tokenizer = AutoTokenizer.from_pretrained(
params["MODEL_NAME"], trust_remote_code=True
)
self.stop_tokens = ["<|im_end|>", "<|endoftext|>"]
self.stop_token_ids = [
self.tokenizer.convert_tokens_to_ids(i) for i in self.stop_tokens
]
self.sampling_params = SamplingParams(
stop_token_ids=self.stop_token_ids,
# use_beam_search=False,
temperature=0.4,
top_p=0.8,
top_k=100,
max_tokens=2048,
)
def infer_video(self, video_path, query, frames_num=MAX_NUM_FRAMES):
frames = encode_video(video_path, frames_num=frames_num)
messages = [
{
"role": "user",
"content": "".join(["(<image>./</image>)"] * len(frames))
+ f"\n{query}",
}
]
prompt = self.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
outputs = self.llm.generate(
{
"prompt": prompt,
"multi_modal_data": {
"image": {
"images": frames,
"use_image_id": False,
"max_slice_nums": 1 if len(frames) > 16 else 2,
}
},
},
sampling_params=self.sampling_params,
)
response = outputs[0].outputs[0].text
return response
def infer_image(self, image_paths, query):
# 先假设image_paths只有一个路径
pass
# image = PIL.Image.open(image_paths)
# outputs = self.llm.generate({})
params = {
# "MODEL_NAME": "/data/home/Taom/projects/VideoUnderstand/scripts/finetune_sh/output/MiniCPM-V-2_6/v2-20241213-182005/checkpoint-870-merged",
"MODEL_NAME": "/data/models/MiniCPM/MiniCPM-o-2_6",
"max_model_len": 2048,
"max_tokens": 2048,
}
model = MiniCPMOServer(params)
@app.post("/v1/MiniCPMV/remote_infer_minio", response_model=VideoDescriptionResponse)
async def remote_infer_minio(request: RemoteInferMinioRequest):
try:
# (1)从minio下载视频
print(request)
local_video_path = f"{MINIO_DIR}/{request.object_name}"
os.makedirs(os.path.dirname(local_video_path), exist_ok=True)
minio_client.fget_object(
request.bucket_name, request.object_name, local_video_path
)
# (2)推理
response, history = model.infer_video(
video_path=local_video_path,
query=request.query,
frames_num=request.frames_num,
)
# (3)删除视频文件
os.remove(local_video_path)
# (4)返回结果
return VideoDescriptionResponse(feature_description=response)
except Exception as e:
print(traceback.format_exc())
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=10100)
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.