Skip to content

Add support for BMP/PNG when loading image sequence from a folder #589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 34 additions & 11 deletions sam2/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def load_video_frames(
compute_device=compute_device,
)
elif is_str and os.path.isdir(video_path):
return load_video_frames_from_jpg_images(
return load_video_frames_from_images(
video_path=video_path,
image_size=image_size,
offload_video_to_cpu=offload_video_to_cpu,
Expand All @@ -220,36 +220,59 @@ def load_video_frames_from_jpg_images(
compute_device=torch.device("cuda"),
):
"""
Load the video frames from a directory of JPEG files ("<frame_index>.jpg" format).
Alias for `load_video_frames_from_images()` for backward compatibility.
"""
return load_video_frames_from_images(
video_path,
image_size,
offload_video_to_cpu,
img_mean,
img_std,
async_loading_frames,
compute_device,
)

def load_video_frames_from_images(
video_path,
image_size,
offload_video_to_cpu,
img_mean=(0.485, 0.456, 0.406),
img_std=(0.229, 0.224, 0.225),
async_loading_frames=False,
compute_device=torch.device("cuda"),
):
"""
Load the video frames from a directory of image files ("<frame_index>.[jpg|jpeg|png|bmp]" format).

The frames are resized to image_size x image_size and are loaded to GPU if
`offload_video_to_cpu` is `False` and to CPU if `offload_video_to_cpu` is `True`.

You can load a frame asynchronously by setting `async_loading_frames` to `True`.
"""
if isinstance(video_path, str) and os.path.isdir(video_path):
jpg_folder = video_path
img_folder = video_path
else:
raise NotImplementedError(
"Only JPEG frames are supported at this moment. For video files, you may use "
"ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG files, such as \n"
"Only JPEG/PNG/BMP frames are supported at this moment. For video files, you may use "
"ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG/PNG files, such as \n"
"```\n"
"ffmpeg -i <your_video>.mp4 -q:v 2 -start_number 0 <output_dir>/'%05d.jpg'\n"
"```\n"
"where `-q:v` generates high-quality JPEG frames and `-start_number 0` asks "
"ffmpeg to start the JPEG file from 00000.jpg."
)


supported_formats = set([".jpg", ".jpeg", ".png", ".bmp"])
frame_names = [
p
for p in os.listdir(jpg_folder)
if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
for p in os.listdir(img_folder)
if os.path.splitext(p)[-1].lower() in supported_formats
]
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
num_frames = len(frame_names)
if num_frames == 0:
raise RuntimeError(f"no images found in {jpg_folder}")
img_paths = [os.path.join(jpg_folder, frame_name) for frame_name in frame_names]
raise RuntimeError(f"no images found in {img_folder}")
img_paths = [os.path.join(img_folder, frame_name) for frame_name in frame_names]
img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]

Expand All @@ -265,7 +288,7 @@ def load_video_frames_from_jpg_images(
return lazy_images, lazy_images.video_height, lazy_images.video_width

images = torch.zeros(num_frames, 3, image_size, image_size, dtype=torch.float32)
for n, img_path in enumerate(tqdm(img_paths, desc="frame loading (JPEG)")):
for n, img_path in enumerate(tqdm(img_paths, desc="frame loading")):
images[n], video_height, video_width = _load_img_as_tensor(img_path, image_size)
if not offload_video_to_cpu:
images = images.to(compute_device)
Expand Down