Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Random Seed for Frame Processing #3416

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Helper utils for processing data into the nerfstudio format."""

import math
import random
import re
import shutil
import sys
Expand Down Expand Up @@ -126,6 +127,7 @@ def convert_video_to_images(
verbose: bool = False,
image_prefix: str = "frame_",
keep_image_dir: bool = False,
random_seed: Optional[int] = None,
) -> Tuple[List[str], int]:
"""Converts a video into a sequence of images.

Expand All @@ -138,6 +140,7 @@ def convert_video_to_images(
verbose: If True, logs the output of the command.
image_prefix: Prefix to use for the image filenames.
keep_image_dir: If True, don't delete the output directory if it already exists.
random_seed: If set, the seed used to choose the frames of the video
Returns:
A tuple containing summary of the conversion and the number of extracted frames.
"""
Expand Down Expand Up @@ -178,8 +181,6 @@ def convert_video_to_images(
start_y = crop_factor[0]
crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y},"

spacing = num_frames // num_frames_target

downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)]
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]
downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)]
Expand All @@ -196,8 +197,15 @@ def convert_video_to_images(

ffmpeg_cmd += " -vsync vfr"

if spacing > 1:
CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing))
# Evenly distribute frame selection if random seed does not exist
spacing = num_frames // num_frames_target
if random_seed:
random.seed(random_seed)
frame_indices = sorted(random.sample(range(num_frames), num_frames_target))
select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB,"
CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.")
elif spacing > 1:
CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals")
select_cmd = f"thumbnail={spacing},setpts=N/TB,"
else:
CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.")
Expand Down
9 changes: 8 additions & 1 deletion nerfstudio/process_data/video_to_nerfstudio_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import shutil
from dataclasses import dataclass
from typing import Literal
from typing import Literal, Optional

from nerfstudio.process_data import equirect_utils, process_data_utils
from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset
Expand All @@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset):
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
should only be used for videos."""
random_seed: Optional[int] = None
"""Random seed to select video frames for training set"""
eval_random_seed: Optional[int] = None
"""Random seed to select video frames for eval set"""

def main(self) -> None:
"""Process video into a nerfstudio dataset."""
Expand All @@ -59,6 +63,7 @@ def main(self) -> None:
num_downscales=0,
crop_factor=(0.0, 0.0, 0.0, 0.0),
verbose=self.verbose,
random_seed=self.random_seed,
)
else:
# If we're not dealing with equirects we can downscale in one step.
Expand All @@ -71,6 +76,7 @@ def main(self) -> None:
verbose=self.verbose,
image_prefix="frame_train_" if self.eval_data is not None else "frame_",
keep_image_dir=False,
random_seed=self.random_seed,
)
if self.eval_data is not None:
summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images(
Expand All @@ -82,6 +88,7 @@ def main(self) -> None:
verbose=self.verbose,
image_prefix="frame_eval_",
keep_image_dir=True,
random_seed=self.eval_random_seed,
)
summary_log += summary_log_eval
num_extracted_frames += num_extracted_frames_eval
Expand Down
114 changes: 113 additions & 1 deletion tests/process_data/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@
Test misc data utils
"""

import os
import re
from pathlib import Path
from unittest import mock

import cv2
import numpy as np
from PIL import Image
from pyquaternion import Quaternion
from scipy.spatial.transform import Rotation

# TODO(1480) use pycolmap instead of colmap_parsing_utils
# import pycolmap
from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat
from nerfstudio.process_data.process_data_utils import convert_video_to_images


def test_scalar_first_scalar_last_quaternions():
Expand Down Expand Up @@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions():

# Expected Rotation matrix
# fmt: off
R_expected = np.array(
R_expected = np.array(
[
[ 0.81379768, -0.44096961, 0.37852231],
[ 0.46984631, 0.88256412, 0.01802831],
Expand All @@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions():
# R = pycolmap.qvec_to_rotmat(wxyz)
R = qvec2rotmat(wxyz)
assert np.allclose(R, R_expected)


def test_process_video_conversion_with_seed(tmp_path: Path):
"""
Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed.
"""

# Inner functions needed for the unit tests
def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1):
"""Creates a mock video from a series of frames using OpenCV."""

first_frame = cv2.imread(str(frame_dir / "frame_0.png"))
height, width, _ = first_frame.shape
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height))

for i in range(num_frames):
frame_path = frame_dir / f"frame_{i}.png"
frame = cv2.imread(str(frame_path))
out.write(frame)
out.release()

def extract_frame_numbers(ffmpeg_command: str):
"""Extracts the frame numbers from the ffmpeg command"""

pattern = r"eq\(n\\,(\d+)\)"
matches = re.findall(pattern, ffmpeg_command)
frame_numbers = [int(match) for match in matches]
return frame_numbers

# Create a video directory with path video
video_dir = tmp_path / "video"
video_dir.mkdir(exist_ok=True)

# Set parameters for mock video
video_path = video_dir / "mock_video.mp4"
num_frames = 10
frame_height = 150
frame_width = 100
frame_rate = 1

# Create the mock video
for i in range(num_frames):
img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0))
img.save(video_dir / f"frame_{i}.png")
create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate)

# Call convert_video_to_images
image_output_dir = tmp_path / "extracted_images"
num_frames_target = 5
num_downscales = 1
crop_factor = (0.0, 0.0, 0.0, 0.0)

# Mock missing COLMAP and ffmpeg in the dev env
old_path = os.environ.get("PATH", "")
os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}"
(tmp_path / "mocked_bin").mkdir()
(tmp_path / "mocked_bin" / "colmap").touch(mode=0o777)
(tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777)

# Return value of 10 for the get_num_frames_in_video run_command call
with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func:
summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=42,
)
assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}"
first_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"

summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=42,
)

assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}"
second_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
assert first_frames == second_frames

summary_log, extracted_frame_count = convert_video_to_images(
video_path=video_path,
image_dir=image_output_dir,
num_frames_target=num_frames_target,
num_downscales=num_downscales,
crop_factor=crop_factor,
verbose=False,
random_seed=52,
)

assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}"
third_frames = extract_frame_numbers(mock_run_func.call_args[0][0])
assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}"
assert first_frames != third_frames
Loading