huggingface · sayakpaul · Sep 12, 2024 · Sep 3, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/tests/lora/test_lora_layers_sd.py b/tests/lora/test_lora_layers_sd.py
@@ -20,7 +20,6 @@
 import torch
 import torch.nn as nn
 from huggingface_hub import hf_hub_download
-from huggingface_hub.repocard import RepoCard
 from safetensors.torch import load_file
 from transformers import CLIPTextModel, CLIPTokenizer
 
@@ -103,7 +102,7 @@ def tearDown(self):
     @slow
     @require_torch_gpu
     def test_integration_move_lora_cpu(self):
-        path = "runwayml/stable-diffusion-v1-5"
+        path = "Jiali/stable-diffusion-1.5"
         lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
         pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
@@ -162,7 +161,7 @@ def test_integration_move_lora_cpu(self):
     def test_integration_move_lora_dora_cpu(self):
         from peft import LoraConfig
 
-        path = "Lykon/dreamshaper-8"
+        path = "Jiali/stable-diffusion-1.5"
         unet_lora_config = LoraConfig(
             init_lora_weights="gaussian",
             target_modules=["to_k", "to_q", "to_v", "to_out.0"],
@@ -222,7 +221,7 @@ def tearDown(self):
         torch.cuda.empty_cache()
 
     def test_integration_logits_with_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
+        path = "Jiali/stable-diffusion-1.5"
         lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
         pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
@@ -254,7 +253,7 @@ def test_integration_logits_with_scale(self):
         release_memory(pipe)
 
     def test_integration_logits_no_scale(self):
-        path = "runwayml/stable-diffusion-v1-5"
+        path = "Jiali/stable-diffusion-1.5"
         lora_id = "takuma104/lora-test-text-encoder-lora-target"
 
         pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float32)
@@ -284,8 +283,8 @@ def test_dreambooth_old_format(self):
         generator = torch.Generator("cpu").manual_seed(0)
 
         lora_model_id = "hf-internal-testing/lora_dreambooth_dog_example"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
+
+        base_model_id = "Jiali/stable-diffusion-1.5"
 
         pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
         pipe = pipe.to(torch_device)
@@ -308,8 +307,8 @@ def test_dreambooth_text_encoder_new_format(self):
         generator = torch.Generator().manual_seed(0)
 
         lora_model_id = "hf-internal-testing/lora-trained"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
+
+        base_model_id = "Jiali/stable-diffusion-1.5"
 
         pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
         pipe = pipe.to(torch_device)
@@ -420,7 +419,7 @@ def test_a1111_with_sequential_cpu_offload(self):
     def test_kohya_sd_v15_with_higher_dimensions(self):
         generator = torch.Generator().manual_seed(0)
 
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+        pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
             torch_device
         )
         lora_model_id = "hf-internal-testing/urushisato-lora"
@@ -444,8 +443,8 @@ def test_vanilla_funetuning(self):
         generator = torch.Generator().manual_seed(0)
 
         lora_model_id = "hf-internal-testing/sd-model-finetuned-lora-t4"
-        card = RepoCard.load(lora_model_id)
-        base_model_id = card.data.to_dict()["base_model"]
+
+        base_model_id = "Jiali/stable-diffusion-1.5"
 
         pipe = StableDiffusionPipeline.from_pretrained(base_model_id, safety_checker=None)
         pipe = pipe.to(torch_device)
@@ -468,7 +467,7 @@ def test_unload_kohya_lora(self):
         prompt = "masterpiece, best quality, mountain"
         num_inference_steps = 2
 
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+        pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
             torch_device
         )
         initial_images = pipe(
@@ -506,7 +505,7 @@ def test_load_unload_load_kohya_lora(self):
         prompt = "masterpiece, best quality, mountain"
         num_inference_steps = 2
 
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", safety_checker=None).to(
+        pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", safety_checker=None).to(
             torch_device
         )
         initial_images = pipe(
@@ -548,9 +547,9 @@ def test_load_unload_load_kohya_lora(self):
 
     def test_not_empty_state_dict(self):
         # Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
-        pipe = AutoPipelineForText2Image.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
-        ).to(torch_device)
+        pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
+            torch_device
+        )
         pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 
         cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
@@ -562,9 +561,9 @@ def test_not_empty_state_dict(self):
 
     def test_load_unload_load_state_dict(self):
         # Makes sure https://github.com/huggingface/diffusers/issues/7054 does not happen again
-        pipe = AutoPipelineForText2Image.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
-        ).to(torch_device)
+        pipe = AutoPipelineForText2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16).to(
+            torch_device
+        )
         pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 
         cached_file = hf_hub_download("hf-internal-testing/lcm-lora-test-sd-v1-5", "test_lora.safetensors")
@@ -581,7 +580,7 @@ def test_load_unload_load_state_dict(self):
         release_memory(pipe)
 
     def test_sdv1_5_lcm_lora(self):
-        pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
+        pipe = DiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
         pipe.to(torch_device)
         pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 
@@ -609,7 +608,7 @@ def test_sdv1_5_lcm_lora(self):
         release_memory(pipe)
 
     def test_sdv1_5_lcm_lora_img2img(self):
-        pipe = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
+        pipe = AutoPipelineForImage2Image.from_pretrained("Jiali/stable-diffusion-1.5", torch_dtype=torch.float16)
         pipe.to(torch_device)
         pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
 
@@ -650,7 +649,7 @@ def test_sd_load_civitai_empty_network_alpha(self):
         This test simply checks that loading a LoRA with an empty network alpha works fine
         See: https://github.com/huggingface/diffusers/issues/5606
         """
-        pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
+        pipeline = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5")
         pipeline.enable_sequential_cpu_offload()
         civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
         pipeline.load_lora_weights(civitai_path, adapter_name="ahri")

diff --git a/tests/models/autoencoders/test_models_vae.py b/tests/models/autoencoders/test_models_vae.py
@@ -1051,7 +1051,7 @@ def test_encode_decode(self):
 
     def test_sd(self):
         vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder")  # TODO - update
-        pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", vae=vae, safety_checker=None)
+        pipe = StableDiffusionPipeline.from_pretrained("Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None)
         pipe.to(torch_device)
 
         out = pipe(
@@ -1099,7 +1099,7 @@ def test_sd_f16(self):
             "openai/consistency-decoder", torch_dtype=torch.float16
         )  # TODO - update
         pipe = StableDiffusionPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
+            "Jiali/stable-diffusion-1.5",
             torch_dtype=torch.float16,
             vae=vae,
             safety_checker=None,
@@ -1124,7 +1124,7 @@ def test_sd_f16(self):
     def test_vae_tiling(self):
         vae = ConsistencyDecoderVAE.from_pretrained("openai/consistency-decoder", torch_dtype=torch.float16)
         pipe = StableDiffusionPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", vae=vae, safety_checker=None, torch_dtype=torch.float16
+            "Jiali/stable-diffusion-1.5", vae=vae, safety_checker=None, torch_dtype=torch.float16
         )
         pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)

diff --git a/tests/models/unets/test_models_unet_2d_condition.py b/tests/models/unets/test_models_unet_2d_condition.py
@@ -1376,7 +1376,7 @@ def test_compvis_sd_v1_4_fp16(self, seed, timestep, expected_slice):
     @require_torch_accelerator
     @skip_mps
     def test_compvis_sd_v1_5(self, seed, timestep, expected_slice):
-        model = self.get_unet_model(model_id="runwayml/stable-diffusion-v1-5")
+        model = self.get_unet_model(model_id="Jiali/stable-diffusion-1.5")
         latents = self.get_latents(seed)
         encoder_hidden_states = self.get_encoder_hidden_states(seed)
 
@@ -1404,7 +1404,7 @@ def test_compvis_sd_v1_5(self, seed, timestep, expected_slice):
     )
     @require_torch_accelerator_with_fp16
     def test_compvis_sd_v1_5_fp16(self, seed, timestep, expected_slice):
-        model = self.get_unet_model(model_id="runwayml/stable-diffusion-v1-5", fp16=True)
+        model = self.get_unet_model(model_id="Jiali/stable-diffusion-1.5", fp16=True)
         latents = self.get_latents(seed, fp16=True)
         encoder_hidden_states = self.get_encoder_hidden_states(seed, fp16=True)
 
@@ -1433,7 +1433,7 @@ def test_compvis_sd_v1_5_fp16(self, seed, timestep, expected_slice):
     @require_torch_accelerator
     @skip_mps
     def test_compvis_sd_inpaint(self, seed, timestep, expected_slice):
-        model = self.get_unet_model(model_id="runwayml/stable-diffusion-inpainting")
+        model = self.get_unet_model(model_id="botp/stable-diffusion-v1-5-inpainting")
         latents = self.get_latents(seed, shape=(4, 9, 64, 64))
         encoder_hidden_states = self.get_encoder_hidden_states(seed)
 
@@ -1461,7 +1461,7 @@ def test_compvis_sd_inpaint(self, seed, timestep, expected_slice):
     )
     @require_torch_accelerator_with_fp16
     def test_compvis_sd_inpaint_fp16(self, seed, timestep, expected_slice):
-        model = self.get_unet_model(model_id="runwayml/stable-diffusion-inpainting", fp16=True)
+        model = self.get_unet_model(model_id="botp/stable-diffusion-v1-5-inpainting", fp16=True)
         latents = self.get_latents(seed, shape=(4, 9, 64, 64), fp16=True)
         encoder_hidden_states = self.get_encoder_hidden_states(seed, fp16=True)
 

diff --git a/tests/pipelines/amused/test_amused.py b/tests/pipelines/amused/test_amused.py
@@ -13,15 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import unittest
 
 import numpy as np
 import torch
 from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
 
 from diffusers import AmusedPipeline, AmusedScheduler, UVit2DModel, VQModel
-from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
+from diffusers.utils.testing_utils import (
+    enable_full_determinism,
+    require_torch_gpu,
+    slow,
+    torch_device,
+)
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import PipelineTesterMixin
@@ -65,19 +69,15 @@ def get_dummy_components(self):
         vqvae = VQModel(
             act_fn="silu",
             block_out_channels=[8],
-            down_block_types=[
-                "DownEncoderBlock2D",
-            ],
+            down_block_types=["DownEncoderBlock2D"],
             in_channels=3,
             latent_channels=8,
             layers_per_block=1,
             norm_num_groups=8,
             num_vq_embeddings=8,
             out_channels=3,
             sample_size=8,
-            up_block_types=[
-                "UpDecoderBlock2D",
-            ],
+            up_block_types=["UpDecoderBlock2D"],
             mid_block_add_attention=False,
             lookup_from_codebook=True,
         )
@@ -96,7 +96,6 @@ def get_dummy_components(self):
         )
         text_encoder = CLIPTextModelWithProjection(text_encoder_config)
         tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
         components = {
             "transformer": transformer,
             "scheduler": scheduler,
@@ -135,47 +134,37 @@ class AmusedPipelineSlowTests(unittest.TestCase):
     def test_amused_256(self):
         pipe = AmusedPipeline.from_pretrained("amused/amused-256")
         pipe.to(torch_device)
-
         image = pipe("dog", generator=torch.Generator().manual_seed(0), num_inference_steps=2, output_type="np").images
-
         image_slice = image[0, -3:, -3:, -1].flatten()
-
         assert image.shape == (1, 256, 256, 3)
-        expected_slice = np.array([0.4011, 0.3992, 0.3790, 0.3856, 0.3772, 0.3711, 0.3919, 0.3850, 0.3625])
-        assert np.abs(image_slice - expected_slice).max() < 3e-3
+        expected_slice = np.array([0.4011, 0.3992, 0.379, 0.3856, 0.3772, 0.3711, 0.3919, 0.385, 0.3625])
+        assert np.abs(image_slice - expected_slice).max() < 0.003
 
     def test_amused_256_fp16(self):
         pipe = AmusedPipeline.from_pretrained("amused/amused-256", variant="fp16", torch_dtype=torch.float16)
         pipe.to(torch_device)
-
         image = pipe("dog", generator=torch.Generator().manual_seed(0), num_inference_steps=2, output_type="np").images
-
         image_slice = image[0, -3:, -3:, -1].flatten()
-
         assert image.shape == (1, 256, 256, 3)
         expected_slice = np.array([0.0554, 0.05129, 0.0344, 0.0452, 0.0476, 0.0271, 0.0495, 0.0527, 0.0158])
-        assert np.abs(image_slice - expected_slice).max() < 7e-3
+        assert np.abs(image_slice - expected_slice).max() < 0.007
 
     def test_amused_512(self):
         pipe = AmusedPipeline.from_pretrained("amused/amused-512")
         pipe.to(torch_device)
-
         image = pipe("dog", generator=torch.Generator().manual_seed(0), num_inference_steps=2, output_type="np").images
-
         image_slice = image[0, -3:, -3:, -1].flatten()
 
         assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.9960, 0.9960, 0.9946, 0.9980, 0.9947, 0.9932, 0.9960, 0.9961, 0.9947])
-        assert np.abs(image_slice - expected_slice).max() < 3e-3
+        expected_slice = np.array([0.1199, 0.1171, 0.1229, 0.1188, 0.1210, 0.1147, 0.1260, 0.1346, 0.1152])
+        assert np.abs(image_slice - expected_slice).max() < 0.003
 
     def test_amused_512_fp16(self):
         pipe = AmusedPipeline.from_pretrained("amused/amused-512", variant="fp16", torch_dtype=torch.float16)
         pipe.to(torch_device)
-
         image = pipe("dog", generator=torch.Generator().manual_seed(0), num_inference_steps=2, output_type="np").images
-
         image_slice = image[0, -3:, -3:, -1].flatten()
 
         assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.9983, 1.0, 1.0, 1.0, 1.0, 0.9989, 0.9994, 0.9976, 0.9977])
-        assert np.abs(image_slice - expected_slice).max() < 3e-3
+        expected_slice = np.array([0.1509, 0.1492, 0.1531, 0.1485, 0.1501, 0.1465, 0.1581, 0.1690, 0.1499])
+        assert np.abs(image_slice - expected_slice).max() < 0.003