Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 5th No.84】新增模型视频生成模型MS-Image2Video+MS-Vid2Vid-XL #345

Merged
merged 13 commits into from
Jan 8, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import gradio as gr
import paddle
from ppdiffusers import ImgToVideoSDPipeline, VideoToVideoModelscopePipeline
from ppdiffusers.utils import export_to_video, load_image

image_to_video_pipe = ImgToVideoSDPipeline.from_pretrained(
"Yang-Changhui/img-to-video-paddle", paddle_dtype=paddle.float32
)

video_to_video_pipe = VideoToVideoModelscopePipeline.from_pretrained("Yang-Changhui/video-to-video-paddle")


def upload_file(file):
return file.name


def image_to_video(image_in):
if image_in is None:
raise gr.Error('请上传图片或等待图片上传完成')
image_in = load_image(image_in)
output_video_frames = image_to_video_pipe(image_in).frames
output_video_path = export_to_video(output_video_frames, "img2video_test.mp4")
print(output_video_path)
return output_video_path


def video_to_video(video_in, text_in):
output_video_frames = video_to_video_pipe(prompt=text_in, video_path=video_in).frames
output_video_path = export_to_video(output_video_frames, "video2video_test.mp4")
print(output_video_path)
return output_video_path


with gr.Blocks() as demo:
gr.Markdown(
"""<center><font size=7>I2VGen-XL</center>
<left><font size=3>I2VGen-XL可以根据用户输入的静态图像和文本生成目标接近、语义相同的视频,生成的视频具高清(1280 * 720)、宽屏(16:9)、时序连贯、质感好等特点。</left>

<left><font size=3>I2VGen-XL can generate videos with similar contents and semantics based on user input static images and text. The generated videos have characteristics such as high-definition (1280 * 720), widescreen (16:9), coherent timing, and good texture.</left>
"""
)
with gr.Blocks():
gr.Markdown(
"""<left><font size=3>步骤1:选择合适的图片进行上传 (建议图片比例为1:1),然后点击“生成视频”,得到满意的视频后进行下一步。”</left>

<left><font size=3>Step 1:Select the image to upload (it is recommended that the image ratio is 1:1), and then click on “Generate Video” to obtain a generated video before proceeding to the next step.</left>"""
)
with gr.Row():
with gr.Column():
image_in = gr.Image(label="图片输入", type="filepath", interactive=False, elem_id="image-in", height=300)
with gr.Row():
upload_image = gr.UploadButton("上传图片", file_types=["image"], file_count="single")
image_submit = gr.Button("生成视频🎬")
with gr.Column():
video_out_1 = gr.Video(label='生成的视频', elem_id='video-out_1', interactive=False, height=300)
gr.Markdown(
"""<left><font size=3>步骤2:补充对视频内容的英文文本描述,然后点击“生成高分辨率视频”,视频生成大致需要2分钟。”</left>

<left><font size=3>Step 1:Add the English text description of the video you want to generate, and then click on “Generate high-resolution video”. The video generation will take about 2 minutes..</left>"""
)
with gr.Row():
with gr.Column():
text_in = gr.Textbox(label="文本描述", lines=2, elem_id="text-in")
video_submit = gr.Button("生成高分辨率视频🎬")
with gr.Column():
paddle.device.cuda.empty_cache()
video_out_2 = gr.Video(label='生成的视频', elem_id='video-out_2', interactive=False, height=300)
gr.Markdown("<left><font size=2>注:如果生成的视频无法播放,请尝试升级浏览器或使用chrome浏览器。</left>")

upload_image.upload(upload_file, upload_image, image_in, queue=False)
image_submit.click(fn=image_to_video, inputs=[image_in], outputs=[video_out_1])

video_submit.click(fn=video_to_video, inputs=[video_out_1, text_in], outputs=[video_out_2])

demo.queue(status_update_rate=1, api_open=False).launch(share=False, show_error=True)
7 changes: 7 additions & 0 deletions ppdiffusers/ppdiffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,25 @@
from .models import (
AsymmetricAutoencoderKL,
AutoencoderKL,
AutoencoderKL_imgtovideo,
ControlNetModel,
GaussianDiffusion,
GaussianDiffusion_SDEdit,
LitEma,
LVDMAutoencoderKL,
LVDMUNet3DModel,
ModelMixin,
MultiAdapter,
PriorTransformer,
STUNetModel,
T2IAdapter,
T5FilmDecoder,
Transformer2DModel,
UNet1DModel,
UNet2DConditionModel,
UNet2DModel,
UNet3DConditionModel,
Vid2VidSTUNet,
VQModel,
)
from .optimization import (
Expand Down Expand Up @@ -170,6 +175,7 @@
IFInpaintingSuperResolutionPipeline,
IFPipeline,
IFSuperResolutionPipeline,
ImgToVideoSDPipeline,
KandinskyCombinedPipeline,
KandinskyImg2ImgCombinedPipeline,
KandinskyImg2ImgPipeline,
Expand Down Expand Up @@ -234,6 +240,7 @@
VersatileDiffusionImageVariationPipeline,
VersatileDiffusionPipeline,
VersatileDiffusionTextToImagePipeline,
VideoToVideoModelscopePipeline,
VideoToVideoSDPipeline,
VQDiffusionPipeline,
)
Expand Down
7 changes: 7 additions & 0 deletions ppdiffusers/ppdiffusers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@
if is_paddle_available():
from .adapter import MultiAdapter, T2IAdapter
from .autoencoder_asym_kl import AsymmetricAutoencoderKL

from .autoencoder_kl import AutoencoderKL
from .controlnet import ControlNetModel
from .dual_transformer_2d import DualTransformer2DModel
from .ema import LitEma
from .lvdm_unet_3d import LVDMUNet3DModel
from .lvdm_vae import LVDMAutoencoderKL
from .modeling_utils import ModelMixin
from .modelscope_autoencoder_img2vid import AutoencoderKL_imgtovideo
from .modelscope_gaussian_diffusion import GaussianDiffusion
from .modelscope_gaussion_sdedit import GaussianDiffusion_SDEdit
from .modelscope_st_unet import STUNetModel
from .modelscope_st_unet_video2video import Vid2VidSTUNet
from .prior_transformer import PriorTransformer

from .t5_film_transformer import T5FilmDecoder
from .transformer_2d import Transformer2DModel
from .unet_1d import UNet1DModel
Expand Down
Loading