diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index b5b2391a090..fb2914711bf 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -15,6 +15,7 @@ """Helper utils for processing data into the nerfstudio format.""" import math +import random import re import shutil import sys @@ -126,6 +127,7 @@ def convert_video_to_images( verbose: bool = False, image_prefix: str = "frame_", keep_image_dir: bool = False, + random_seed: Optional[int] = None, ) -> Tuple[List[str], int]: """Converts a video into a sequence of images. @@ -178,8 +180,6 @@ def convert_video_to_images( start_y = crop_factor[0] crop_cmd = f"crop=w=iw*{width}:h=ih*{height}:x=iw*{start_x}:y=ih*{start_y}," - spacing = num_frames // num_frames_target - downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}[out{i}]" for i in range(num_downscales + 1)] downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)] downscale_paths = [downscale_dirs[i] / f"{image_prefix}%05d.png" for i in range(num_downscales + 1)] @@ -196,7 +196,14 @@ def convert_video_to_images( ffmpeg_cmd += " -vsync vfr" - if spacing > 1: + # Evenly distribute frame selection if random seed does not exist + spacing = num_frames // num_frames_target + if random_seed: + random.seed(random_seed) + frame_indices = sorted(random.sample(range(num_frames), num_frames_target)) + select_cmd = "select='" + "+".join(["eq(n\\," + str(idx) + ")" for idx in frame_indices]) + "',setpts=N/TB," + CONSOLE.print(f"Extracting {num_frames_target} frames using seed-based random selection.") + elif spacing > 1: CONSOLE.print("Number of frames to extract:", math.ceil(num_frames / spacing)) select_cmd = f"thumbnail={spacing},setpts=N/TB," else: diff --git a/nerfstudio/process_data/video_to_nerfstudio_dataset.py b/nerfstudio/process_data/video_to_nerfstudio_dataset.py index af17e7d6b6b..f7415bb6522 100644 --- a/nerfstudio/process_data/video_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/video_to_nerfstudio_dataset.py @@ -41,6 +41,10 @@ class VideoToNerfstudioDataset(ColmapConverterToNerfstudioDataset): """Feature matching method to use. Vocab tree is recommended for a balance of speed and accuracy. Exhaustive is slower but more accurate. Sequential is faster but should only be used for videos.""" + random_seed: Optional[int] = None + """Random seed to select video frames""" + eval_random_seed: Optional[int] = None + """Random seed to select video frames for eval set""" def main(self) -> None: """Process video into a nerfstudio dataset.""" @@ -59,6 +63,7 @@ def main(self) -> None: num_downscales=0, crop_factor=(0.0, 0.0, 0.0, 0.0), verbose=self.verbose, + random_seed=self.random_seed, ) else: # If we're not dealing with equirects we can downscale in one step. @@ -71,6 +76,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_train_" if self.eval_data is not None else "frame_", keep_image_dir=False, + random_seed=self.random_seed, ) if self.eval_data is not None: summary_log_eval, num_extracted_frames_eval = process_data_utils.convert_video_to_images( @@ -82,6 +88,7 @@ def main(self) -> None: verbose=self.verbose, image_prefix="frame_eval_", keep_image_dir=True, + random_seed=self.eval_random_seed, ) summary_log += summary_log_eval num_extracted_frames += num_extracted_frames_eval