From fdb1baa05c8da5b4ed3e7a62200f406dcb26ba79 Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Mon, 20 May 2024 19:48:21 -0700 Subject: [PATCH] [docs] VideoProcessor (#7965) * fix? * fix? * fix --- docs/source/en/api/video_processor.md | 8 +++++++- src/diffusers/video_processor.py | 20 +++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/docs/source/en/api/video_processor.md b/docs/source/en/api/video_processor.md index ef244210bd23..6461c46c286f 100644 --- a/docs/source/en/api/video_processor.md +++ b/docs/source/en/api/video_processor.md @@ -12,4 +12,10 @@ specific language governing permissions and limitations under the License. # Video Processor -The `VideoProcessor` provides a unified API for video pipelines to prepare inputs for VAE encoding and post-processing outputs once they're decoded. The class inherits [`VaeImageProcessor`] so it includes transformations such as resizing, normalization, and conversion between PIL Image, PyTorch, and NumPy arrays. \ No newline at end of file +The [`VideoProcessor`] provides a unified API for video pipelines to prepare inputs for VAE encoding and post-processing outputs once they're decoded. The class inherits [`VaeImageProcessor`] so it includes transformations such as resizing, normalization, and conversion between PIL Image, PyTorch, and NumPy arrays. + +## VideoProcessor + +[[autodoc]] video_processor.VideoProcessor.preprocess_video + +[[autodoc]] video_processor.VideoProcessor.postprocess_video diff --git a/src/diffusers/video_processor.py b/src/diffusers/video_processor.py index 5fcba7836aea..9e2727b85377 100644 --- a/src/diffusers/video_processor.py +++ b/src/diffusers/video_processor.py @@ -30,17 +30,19 @@ def preprocess_video(self, video, height: Optional[int] = None, width: Optional[ Preprocesses input video(s). Args: - video: The input video. It can be one of the following: + video (`List[PIL.Image]`, `List[List[PIL.Image]]`, `torch.Tensor`, `np.array`, `List[torch.Tensor]`, `List[np.array]`): + The input video. It can be one of the following: * List of the PIL images. * List of list of PIL images. - * 4D Torch tensors (expected shape for each tensor: (num_frames, num_channels, height, width)). - * 4D NumPy arrays (expected shape for each array: (num_frames, height, width, num_channels)). - * List of 4D Torch tensors (expected shape for each tensor: (num_frames, num_channels, height, width)). - * List of 4D NumPy arrays (expected shape for each array: (num_frames, height, width, num_channels)). - * 5D NumPy arrays: expected shape for each array: (batch_size, num_frames, height, width, - num_channels). - * 5D Torch tensors: expected shape for each array: (batch_size, num_frames, num_channels, height, - width). + * 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, width)`). + * 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`). + * List of 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, + width)`). + * List of 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`). + * 5D NumPy arrays: expected shape for each array `(batch_size, num_frames, height, width, + num_channels)`. + * 5D Torch tensors: expected shape for each array `(batch_size, num_frames, num_channels, height, + width)`. height (`int`, *optional*, defaults to `None`): The height in preprocessed frames of the video. If `None`, will use the `get_default_height_width()` to get default height.