Skip to content

Commit b689ada

Browse files
[Frontend] Enable decord to load video from base64 (#11492)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent fc60166 commit b689ada

File tree

1 file changed

+19
-20
lines changed

1 file changed

+19
-20
lines changed

vllm/multimodal/utils.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -125,17 +125,7 @@ async def async_fetch_image(image_url: str,
125125
return image.convert(image_mode)
126126

127127

128-
def _load_video_frames_from_bytes(b: bytes):
129-
frame = Image.open(BytesIO(b))
130-
return np.array(frame)
131-
132-
133-
def load_video_frames_from_base64(frame: Union[bytes, str]):
134-
"""Load frame from base64 format."""
135-
return _load_video_frames_from_bytes(base64.b64decode(frame))
136-
137-
138-
def _load_video_from_bytes(b: bytes, num_frames: int = 32):
128+
def _load_video_from_bytes(b: bytes, num_frames: int = 32) -> npt.NDArray:
139129
_, decord = try_import_video_packages()
140130

141131
video_path = BytesIO(b)
@@ -155,13 +145,17 @@ def _load_video_from_bytes(b: bytes, num_frames: int = 32):
155145
return frames
156146

157147

158-
def _load_video_from_data_url(video_url: str):
159-
# Only split once and assume the second part is the base64 encoded image
160-
frames_base64 = video_url.split(",")[1:]
161-
return np.stack([
162-
load_video_frames_from_base64(frame_base64)
163-
for frame_base64 in frames_base64
164-
])
148+
def _load_video_from_data_url(video_url: str) -> npt.NDArray:
149+
# Only split once and assume the second part is the base64 encoded video
150+
_, video_base64 = video_url.split(",", 1)
151+
152+
if video_url.startswith("data:video/jpeg;"):
153+
return np.stack([
154+
np.array(load_image_from_base64(frame_base64))
155+
for frame_base64 in video_base64.split(",")
156+
])
157+
158+
return load_video_from_base64(video_base64)
165159

166160

167161
def fetch_video(video_url: str, *, num_frames: int = 32) -> npt.NDArray:
@@ -342,7 +336,7 @@ def rescale_image_size(image: Image.Image,
342336
return image
343337

344338

345-
def try_import_video_packages() -> Any:
339+
def try_import_video_packages():
346340
try:
347341
import cv2
348342
import decord
@@ -384,7 +378,7 @@ def sample_frames_from_video(frames: npt.NDArray,
384378
return sampled_frames
385379

386380

387-
def encode_video_base64(frames: npt.NDArray):
381+
def encode_video_base64(frames: npt.NDArray) -> str:
388382
base64_frames = []
389383
frames_list = [frames[i] for i in range(frames.shape[0])]
390384
for frame in frames_list:
@@ -393,6 +387,11 @@ def encode_video_base64(frames: npt.NDArray):
393387
return ",".join(base64_frames)
394388

395389

390+
def load_video_from_base64(video: Union[bytes, str]) -> npt.NDArray:
391+
"""Load video from base64 format."""
392+
return _load_video_from_bytes(base64.b64decode(video))
393+
394+
396395
def resolve_visual_encoder_outputs(
397396
encoder_outputs: Union[torch.Tensor, list[torch.Tensor]],
398397
feature_sample_layers: Optional[list[int]],

0 commit comments

Comments
 (0)