Merge pull request #4 from rupeshs/add-wuerstchen-support

rupeshs · web-flow · commit 2dc7848e07fc · 2023-09-17T13:07:50.000+05:30
Add wuerstchen support
diff --git a/Readme.md b/Readme.md
@@ -12,6 +12,7 @@ We can run StableDiffusion XL 1.0 on Google Colab
 [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eEZ_O-Fw87hoEsfSxUnGZhdqvMFEO5iV?usp=sharing)
 
 ## Features
+- Supports Würstchen
 - Supports Stable diffusion XL
 - Supports various Stable Diffusion workflows
     - Text to Image 
diff --git a/configs/stable_diffusion_models.txt b/configs/stable_diffusion_models.txt
@@ -17,4 +17,5 @@ lllyasviel/sd-controlnet-openpose
 lllyasviel/sd-controlnet-depth
 lllyasviel/sd-controlnet-scribble
 lllyasviel/sd-controlnet-seg
-stabilityai/stable-diffusion-xl-base-1.0
+stabilityai/stable-diffusion-xl-base-1.0
+warp-ai/wuerstchen
diff --git a/environment.yml b/environment.yml
@@ -6,23 +6,23 @@ channels:
   - defaults
 dependencies:
   - python=3.8.5
-  - pip=20.3
+  - pip=23.2.1
   - pytorch-cuda=11.7
-  - pytorch=2.0.0
-  - torchvision=0.15.0
+  - pytorch=2.0.1
+  - torchvision=0.15.2
   - numpy=1.19.2
   - pip:
-      - accelerate==0.21.0
-      - diffusers==0.19.3
+      - accelerate==0.23.0
+      - diffusers==0.21.1
       - gradio==3.39.0
       - safetensors==0.3.1
       - scipy==1.10.0
-      - transformers==4.31.0
+      - transformers==4.33.2
       - pydantic==1.10.4
       - mypy==1.0.0
       - black==23.1.0
       - flake8==6.0.0
-      - markupsafe==2.0.1
+      - markupsafe==2.1.3
       - opencv-contrib-python==4.7.0.72
       - controlnet-aux==0.0.1
       - invisible-watermark==0.2.0
diff --git a/src/app.py b/src/app.py
@@ -5,8 +5,13 @@
 from frontend.web.ui import diffusionmagic_web_ui
 from settings import AppSettings
 
-# mypy --ignore-missing-imports --explicit-package-bases .
-# flake8 --max-line-length=100 .
+
+def _get_model(model_id: str) -> str:
+    if model_id == "":
+        model_id = AppSettings().get_settings().model_settings.model_id
+    return model_id
+
+
 if __name__ == "__main__":
     try:
         app_settings = AppSettings()
@@ -19,10 +24,27 @@
     parser.add_argument(
         "-s", "--share", help="Shareable link", action="store_true", default=False
     )
+    parser.add_argument(
+        "-m",
+        "--model",
+        help="Model identifier,E.g. runwayml/stable-diffusion-v1-5",
+        default="",
+    )
     args = parser.parse_args()
     compute = Computing()
-    generate = Generate(compute)
-    dm_web_ui = diffusionmagic_web_ui(generate)
+    model_id = _get_model(args.model)
+
+    print(f"Model : {model_id}")
+
+    generate = Generate(
+        compute,
+        model_id,
+    )
+
+    dm_web_ui = diffusionmagic_web_ui(
+        generate,
+        model_id,
+    )
     if args.share:
         dm_web_ui.queue().launch(share=True)
     else:
diff --git a/src/backend/controlnet/controls/normal_control.py b/src/backend/controlnet/controls/normal_control.py
@@ -18,10 +18,10 @@ def get_control_image(self, image: Image) -> Image:
 
         bg_threhold = 0.4
 
-        x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
+        x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)  # type: ignore
         x[image_depth < bg_threhold] = 0
 
-        y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
+        y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)  # type: ignore
         y[image_depth < bg_threhold] = 0
 
         z = np.ones_like(x) * np.pi * 2.0
diff --git a/src/backend/generate.py b/src/backend/generate.py
@@ -13,14 +13,20 @@
     StableDiffusionImageInstructPixToPixSetting,
     StableDiffusionControlnetSetting,
 )
+from backend.wuerstchen.models.setting import WurstchenSetting
 from backend.controlnet.ControlContext import ControlnetContext
 from backend.stablediffusion.stablediffusion import StableDiffusion
 from backend.stablediffusion.stablediffusionxl import StableDiffusionXl
+from backend.wuerstchen.wuerstchen import Wuerstchen
 from settings import AppSettings
 
 
 class Generate:
-    def __init__(self, compute: Computing):
+    def __init__(
+        self,
+        compute: Computing,
+        model_id: str,
+    ):
         self.pipe_initialized = False
         self.inpaint_pipe_initialized = False
         self.depth_pipe_initialized = False
@@ -33,8 +39,9 @@ def __init__(self, compute: Computing):
         self.controlnet = ControlnetContext(compute)
         self.stable_diffusion_xl = StableDiffusionXl(compute)
         self.app_settings = AppSettings().get_settings()
-        self.model_id = self.app_settings.model_settings.model_id
+        self.model_id = model_id
         self.low_vram_mode = self.app_settings.low_memory_mode
+        self.wuerstchen = Wuerstchen(compute)
 
     def diffusion_text_to_image(
         self,
@@ -89,6 +96,15 @@ def _init_stable_diffusion_xl(self):
             )
             self.pipe_initialized = True
 
+    def _init_wuerstchen(self):
+        if not self.pipe_initialized:
+            print("Initializing wuerstchen pipeline")
+            self.wuerstchen.get_text_to_image_wuerstchen_pipleline(
+                self.model_id,
+                self.low_vram_mode,
+            )
+            self.pipe_initialized = True
+
     def diffusion_image_to_image(
         self,
         image,
@@ -479,3 +495,30 @@ def diffusion_image_variations_xl(
             "ImageVariations",
         )
         return images
+
+    def diffusion_text_to_image_wuerstchen(
+        self,
+        prompt,
+        neg_prompt,
+        image_height,
+        image_width,
+        guidance_scale,
+        num_images,
+        seed,
+    ) -> Any:
+        wurstchen_settings = WurstchenSetting(
+            prompt=prompt,
+            negative_prompt=neg_prompt,
+            image_height=image_height,
+            image_width=image_width,
+            prior_guidance_scale=guidance_scale,
+            number_of_images=num_images,
+            seed=seed,
+        )
+        self._init_wuerstchen()
+        images = self.wuerstchen.text_to_image_wuerstchen(wurstchen_settings)
+        self._save_images(
+            images,
+            "TextToImage",
+        )
+        return images
diff --git a/src/backend/stablediffusion/stable_diffusion_types.py b/src/backend/stablediffusion/stable_diffusion_types.py
@@ -17,12 +17,14 @@ class StableDiffusionType(str, Enum):
     controlnet_scribble = "controlnet_scribble"
     controlnet_seg = "controlnet_seg"
     stable_diffusion_xl = "StableDiffusionXl"
+    wuerstchen = "Wuerstchen"
 
 
 def get_diffusion_type(
     model_id: str,
 ) -> StableDiffusionType:
     stable_diffusion_type = StableDiffusionType.base
+    model_id = model_id.lower()
     if "inpainting" in model_id:
         stable_diffusion_type = StableDiffusionType.inpainting
     elif "instruct-pix2pix" in model_id:
@@ -47,4 +49,6 @@ def get_diffusion_type(
         stable_diffusion_type = StableDiffusionType.controlnet_seg
     elif "stable-diffusion-xl" in model_id:
         stable_diffusion_type = StableDiffusionType.stable_diffusion_xl
+    elif "wuerstchen" in model_id:
+        stable_diffusion_type = StableDiffusionType.wuerstchen
     return stable_diffusion_type
diff --git a/src/backend/stablediffusion/stablediffusionxl.py b/src/backend/stablediffusion/stablediffusionxl.py
@@ -1,6 +1,6 @@
 from time import time
 
-import torch
+from torch import Generator
 from diffusers import (
     DiffusionPipeline,
     StableDiffusionXLImg2ImgPipeline,
@@ -72,7 +72,7 @@ def text_to_image_xl(self, setting: StableDiffusionSetting):
         generator = None
         if setting.seed != -1:
             print(f"Using seed {setting.seed}")
-            generator = torch.Generator(self.device).manual_seed(setting.seed)
+            generator = Generator(self.device).manual_seed(setting.seed)
 
         # if setting.attention_slicing:
         #     self.pipeline.enable_attention_slicing()
@@ -149,7 +149,7 @@ def image_to_image(self, setting: StableDiffusionImageToImageSetting):
         generator = None
         if setting.seed != -1 and setting.seed:
             print(f"Using seed {setting.seed}")
-            generator = torch.Generator(self.device).manual_seed(setting.seed)
+            generator = Generator(self.device).manual_seed(setting.seed)
 
         if setting.attention_slicing:
             self.img_to_img_pipeline.enable_attention_slicing()  # type: ignore
diff --git a/src/backend/wuerstchen/models/setting.py b/src/backend/wuerstchen/models/setting.py
@@ -0,0 +1,12 @@
+from pydantic import BaseModel
+from typing import Optional
+
+
+class WurstchenSetting(BaseModel):
+    prompt: str
+    negative_prompt: Optional[str]
+    image_height: Optional[int] = 512
+    image_width: Optional[int] = 512
+    prior_guidance_scale: Optional[float] = 4.0
+    number_of_images: Optional[int] = 1
+    seed: Optional[int] = -1
diff --git a/src/backend/wuerstchen/wuerstchen.py b/src/backend/wuerstchen/wuerstchen.py
@@ -0,0 +1,84 @@
+from time import time
+
+from backend.computing import Computing
+from backend.wuerstchen.models.setting import WurstchenSetting
+from torch import Generator
+from diffusers.pipelines.wuerstchen import DEFAULT_STAGE_C_TIMESTEPS
+from diffusers import AutoPipelineForText2Image
+
+
+class Wuerstchen:
+    def __init__(self, compute: Computing):
+        self.compute = compute
+        self.pipeline = None
+        self.device = self.compute.name
+        super().__init__()
+
+    def get_text_to_image_wuerstchen_pipleline(
+        self,
+        model_id: str = "warp-ai/wuerstchen",
+        low_vram_mode: bool = False,
+    ):
+        self.model_id = model_id
+
+        self.low_vram_mode = low_vram_mode
+        print(f"Wuerstchen - {self.compute.name},{self.compute.datatype}")
+        print(f"using model {model_id}")
+        tic = time()
+        self._load_model()
+        self._pipeline_to_device()
+        delta = time() - tic
+        print(f"Model loaded in {delta:.2f}s ")
+
+    def text_to_image_wuerstchen(self, setting: WurstchenSetting):
+        if self.pipeline is None:
+            raise Exception("Text to image pipeline not initialized")
+
+        generator = None
+        if setting.seed != -1:
+            print(f"Using seed {setting.seed}")
+            generator = Generator(self.device).manual_seed(setting.seed)
+
+        images = self.pipeline(
+            setting.prompt,
+            negative_prompt=setting.negative_prompt,
+            height=setting.image_height,
+            width=setting.image_width,
+            prior_timesteps=DEFAULT_STAGE_C_TIMESTEPS,
+            prior_guidance_scale=setting.prior_guidance_scale,
+            num_images_per_prompt=setting.number_of_images,
+            generator=generator,
+        ).images
+
+        return images
+
+    def _pipeline_to_device(self):
+        if self.low_vram_mode:
+            print("Running in low VRAM mode,slower to generate images")
+            self.pipeline.enable_sequential_cpu_offload()
+        else:
+            if self.compute.name == "cuda":
+                self.pipeline = self.pipeline.to("cuda")
+            elif self.compute.name == "mps":
+                self.pipeline = self.pipeline.to("mps")
+
+    def _load_full_precision_model(self):
+        self.pipeline = AutoPipelineForText2Image.from_pretrained(
+            self.model_id,
+            torch_dtype=self.compute.datatype,
+        )
+
+    def _load_model(self):
+        if self.compute.name == "cuda":
+            try:
+                self.pipeline = AutoPipelineForText2Image.from_pretrained(
+                    self.model_id,
+                    torch_dtype=self.compute.datatype,
+                )
+            except Exception as ex:
+                print(
+                    f" The fp16 of the model not found using full precision model,  {ex}"
+                )
+                self._load_full_precision_model()
+        else:
+            self._load_full_precision_model()
diff --git a/src/constants.py b/src/constants.py
@@ -1,4 +1,4 @@
-VERSION = "3.0.0"
+VERSION = "3.5.0"
 STABLE_DIFFUSION_MODELS_FILE = "stable_diffusion_models.txt"
 APP_SETTINGS_FILE = "settings.yaml"
 CONFIG_DIRECTORY = "configs"
diff --git a/src/frontend/web/depth_to_image_ui.py b/src/frontend/web/depth_to_image_ui.py
@@ -113,9 +113,9 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
+
         generate_btn.click(
             fn=generate_callback_fn,
             inputs=input_params,
diff --git a/src/frontend/web/image_inpainting_ui.py b/src/frontend/web/image_inpainting_ui.py
@@ -104,7 +104,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/image_inpainting_xl_ui.py b/src/frontend/web/image_inpainting_xl_ui.py
@@ -104,7 +104,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/image_to_image_ui.py b/src/frontend/web/image_to_image_ui.py
@@ -113,7 +113,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/image_to_image_xl_ui.py b/src/frontend/web/image_to_image_xl_ui.py
@@ -114,7 +114,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/image_variations_ui.py b/src/frontend/web/image_variations_ui.py
@@ -98,7 +98,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/image_variations_xl_ui.py b/src/frontend/web/image_variations_xl_ui.py
@@ -98,7 +98,6 @@ def random_seed():
                     label="Generated images",
                     show_label=True,
                     elem_id="gallery",
-                ).style(
                     columns=2,
                 )
         generate_btn.click(
diff --git a/src/frontend/web/instruct_pix_to_pix_ui.py b/src/frontend/web/instruct_pix_to_pix_ui.py
diff --git a/src/frontend/web/text_to_image_ui.py b/src/frontend/web/text_to_image_ui.py
diff --git a/src/frontend/web/text_to_image_wuerstchen_ui.py b/src/frontend/web/text_to_image_wuerstchen_ui.py
diff --git a/src/frontend/web/text_to_image_xl_ui.py b/src/frontend/web/text_to_image_xl_ui.py
diff --git a/src/frontend/web/ui.py b/src/frontend/web/ui.py