From 3e638da63571d391ef36c5906b1532555c1ca7db Mon Sep 17 00:00:00 2001 From: Anthony Tafoya <87080582+Anthony-Tafoya@users.noreply.github.com> Date: Tue, 17 Sep 2024 08:32:09 -0700 Subject: [PATCH] Adding Random Seed for Frame Processing (#3416) * Adding Random Seed for Frame Processing * Added Unit Tests * Updating Unit Tests for Ffmpeg * Make Logs More Detailed --------- Co-authored-by: Anthony-Tafoya Co-authored-by: J.Y. <132313008+jb-ye@users.noreply.github.com> --- nerfstudio/process_data/process_data_utils.py | 16 ++- .../video_to_nerfstudio_dataset.py | 9 +- tests/process_data/test_misc.py | 114 +++++++++++++++++- 3 files changed, 133 insertions(+), 6 deletions(-) diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index b5b2391a09..3c9013abe3 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -15,6 +15,7 @@ """Helper utils for processing data into the nerfstudio format.""" import math +import random import re import shutil import sys @@ -126,6 +127,7 @@ def convert_video_to_images( verbose: bool = False, image_prefix: str = "frame_", keep_image_dir: bool = False, + random_seed: Optional[int] = None, ) -> Tuple[List[str], int]: """Converts a video into a sequence of images. @@ -138,6 +140,7 @@ def convert_video_to_images( verbose: If True, logs the output of the command. image_prefix: Prefix to use for the image filenames. keep_image_dir: If True, don't delete the output directory if it already exists. + random_seed: If set, the seed used to choose the frames of the video Returns: A tuple containing summary of the conversion and the number of extracted frames. """ @@ -178,8 +181,6 @@ def convert_video_to_images( start_y = crop_factor[0] crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y}," - spacing = num_frames // num_frames_target - downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)] downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)] downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)] @@ -196,8 +197,15 @@ def convert_video_to_images( ffmpeg_cmd += " -vsync vfr" - if spacing > 1: - CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing)) + # Evenly distribute frame selection if random seed does not exist + spacing = num_frames // num_frames_target + if random_seed: + random.seed(random_seed) + frame_indices = sorted(random.sample(range(num_frames), num_frames_target)) + select_cmd = "select='" + "+".join([f"eq(n\,{idx})" for idx in frame_indices]) + "',setpts=N/TB," + CONSOLE.print(f"Extracting {num_frames_target} frames using seed {random_seed} random selection.") + elif spacing > 1: + CONSOLE.print(f"Extracting {math.ceil(num_frames / spacing)} frames in evenly spaced intervals") select_cmd = f"thumbnail={spacing},setpts=N/TB," else: CONSOLE.print("[bold red]Can't satisfy requested number of frames. Extracting all frames.") diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py index af17e7d6b6..51a8a0b761 100644 --- a/nerfstudio/process_data/video_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/video_to_nerfstudio_dataset.py @@ -16,7 +16,7 @@ import shutil from dataclasses import dataclass -from typing import Literal +from typing import Literal, Optional from nerfstudio.process_data import equirect_utils, process_data_utils from nerfstudio.process_data.colmap_converter_to_nerfstudio_dataset import ColmapConverterToNerfstudioDataset @@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset): """Feature matching method to use. Vocab tree is recommended for a balance of speed and accuracy. Exhaustive is slower but more accurate. Sequential is faster but should only be used for videos.""" + random_seed: Optional[int] = None + """Random seed to select video frames for training set""" + eval_random_seed: Optional[int] = None + """Random seed to select video frames for eval set""" def main(self) -> None: """Process video into a nerfstudio dataset.""" @@ -59,6 +63,7 @@ def main(self) -> None: num_downscales=0, crop_factor=(0.0, 0.0, 0.0, 0.0), verbose=self.verbose, + random_seed=self.random_seed, ) else: # If we're not dealing with equirects we can downscale in one step. @@ -71,6 +76,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_train_" if self.eval_data is not None else "frame_", keep_image_dir=False, + random_seed=self.random_seed, ) if self.eval_data is not None: summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images( @@ -82,6 +88,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_eval_", keep_image_dir=True, + random_seed=self.eval_random_seed, ) summary_log += summary_log_eval num_extracted_frames += num_extracted_frames_eval diff --git a/tests/process_data/test_misc.py b/tests/process_data/test_misc.py index 1b2404b517..23fc3453ca 100644 --- a/tests/process_data/test_misc.py +++ b/tests/process_data/test_misc.py @@ -2,13 +2,21 @@ Test misc data utils """ +import os +import re +from pathlib import Path +from unittest import mock + +import cv2 import numpy as np +from PIL import Image from pyquaternion import Quaternion from scipy.spatial.transform import Rotation # TODO(1480) use pycolmap instead of colmap_parsing_utils # import pycolmap from nerfstudio.data.utils.colmap_parsing_utils import qvec2rotmat +from nerfstudio.process_data.process_data_utils import convert_video_to_images def test_scalar_first_scalar_last_quaternions(): @@ -39,7 +47,7 @@ def test_scalar_first_scalar_last_quaternions(): # Expected Rotation matrix # fmt: off - R_expected = np.array( + R_expected = np.array( [ [ 0.81379768, -0.44096961, 0.37852231], [ 0.46984631, 0.88256412, 0.01802831], @@ -61,3 +69,107 @@ def test_scalar_first_scalar_last_quaternions(): # R = pycolmap.qvec_to_rotmat(wxyz) R = qvec2rotmat(wxyz) assert np.allclose(R, R_expected) + + +def test_process_video_conversion_with_seed(tmp_path: Path): + """ + Test convert_video_to_images by creating a mock video and ensuring correct frame extraction with seed. + """ + + # Inner functions needed for the unit tests + def create_mock_video(video_path: Path, frame_dir: Path, num_frames=10, frame_rate=1): + """Creates a mock video from a series of frames using OpenCV.""" + + first_frame = cv2.imread(str(frame_dir / "frame_0.png")) + height, width, _ = first_frame.shape + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + out = cv2.VideoWriter(str(video_path), fourcc, frame_rate, (width, height)) + + for i in range(num_frames): + frame_path = frame_dir / f"frame_{i}.png" + frame = cv2.imread(str(frame_path)) + out.write(frame) + out.release() + + def extract_frame_numbers(ffmpeg_command: str): + """Extracts the frame numbers from the ffmpeg command""" + + pattern = r"eq\(n\\,(\d+)\)" + matches = re.findall(pattern, ffmpeg_command) + frame_numbers = [int(match) for match in matches] + return frame_numbers + + # Create a video directory with path video + video_dir = tmp_path / "video" + video_dir.mkdir(exist_ok=True) + + # Set parameters for mock video + video_path = video_dir / "mock_video.mp4" + num_frames = 10 + frame_height = 150 + frame_width = 100 + frame_rate = 1 + + # Create the mock video + for i in range(num_frames): + img = Image.new("RGB", (frame_width, frame_height), (0, 0, 0)) + img.save(video_dir / f"frame_{i}.png") + create_mock_video(video_path, video_dir, num_frames=num_frames, frame_rate=frame_rate) + + # Call convert_video_to_images + image_output_dir = tmp_path / "extracted_images" + num_frames_target = 5 + num_downscales = 1 + crop_factor = (0.0, 0.0, 0.0, 0.0) + + # Mock missing COLMAP and ffmpeg in the dev env + old_path = os.environ.get("PATH", "") + os.environ["PATH"] = str(tmp_path / "mocked_bin") + f":{old_path}" + (tmp_path / "mocked_bin").mkdir() + (tmp_path / "mocked_bin" / "colmap").touch(mode=0o777) + (tmp_path / "mocked_bin" / "ffmpeg").touch(mode=0o777) + + # Return value of 10 for the get_num_frames_in_video run_command call + with mock.patch("nerfstudio.process_data.process_data_utils.run_command", return_value="10") as mock_run_func: + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=42, + ) + assert mock_run_func.call_count == 2, f"Expected 2 calls, but got {mock_run_func.call_count}" + first_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(first_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=42, + ) + + assert mock_run_func.call_count == 4, f"Expected 4 total calls, but got {mock_run_func.call_count}" + second_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(second_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + assert first_frames == second_frames + + summary_log, extracted_frame_count = convert_video_to_images( + video_path=video_path, + image_dir=image_output_dir, + num_frames_target=num_frames_target, + num_downscales=num_downscales, + crop_factor=crop_factor, + verbose=False, + random_seed=52, + ) + + assert mock_run_func.call_count == 6, f"Expected 6 total calls, but got {mock_run_func.call_count}" + third_frames = extract_frame_numbers(mock_run_func.call_args[0][0]) + assert len(third_frames) == 5, f"Expected 5 frames, but got {len(first_frames)}" + assert first_frames != third_frames