huggingface · DN6 · Aug 22, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 21, 2024
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -2011,6 +2011,11 @@ def __call__(
         key = attn.head_to_batch_dim(key).contiguous()
         value = attn.head_to_batch_dim(value).contiguous()
 
+        if attn.norm_q is not None:
+            query = attn.norm_q(query)
+        if attn.norm_k is not None:
+            key = attn.norm_k(key)
+
         hidden_states = xformers.ops.memory_efficient_attention(
             query, key, value, attn_bias=attention_mask, op=self.attention_op, scale=attn.scale
         )

diff --git a/tests/pipelines/animatediff/test_animatediff_controlnet.py b/tests/pipelines/animatediff/test_animatediff_controlnet.py
@@ -20,6 +20,7 @@
 )
 from diffusers.models.attention import FreeNoiseTransformerBlock
 from diffusers.utils import logging
+from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import torch_device
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
@@ -329,6 +330,13 @@ def test_prompt_embeds(self):
         inputs["prompt_embeds"] = torch.randn((1, 4, pipe.text_encoder.config.hidden_size), device=torch_device)
         pipe(**inputs)
 
+    @unittest.skipIf(
+        torch_device != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
+    def test_xformers_attention_forwardGenerator_pass(self):
+        super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False)
+
     def test_free_init(self):
         components = self.get_dummy_components()
         pipe: AnimateDiffControlNetPipeline = self.pipeline_class(**components)

diff --git a/tests/pipelines/animatediff/test_animatediff_sparsectrl.py b/tests/pipelines/animatediff/test_animatediff_sparsectrl.py
@@ -19,6 +19,7 @@
     UNetMotionModel,
 )
 from diffusers.utils import logging
+from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import torch_device
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
@@ -393,6 +394,13 @@ def test_prompt_embeds(self):
         inputs["prompt_embeds"] = torch.randn((1, 4, pipe.text_encoder.config.hidden_size), device=torch_device)
         pipe(**inputs)
 
+    @unittest.skipIf(
+        torch_device != "cuda" or not is_xformers_available(),
+        reason="XFormers attention is only available with CUDA and `xformers` installed",
+    )
+    def test_xformers_attention_forwardGenerator_pass(self):
+        super()._test_xformers_attention_forwardGenerator_pass(test_mean_pixel_difference=False)
+
     def test_free_init(self):
         components = self.get_dummy_components()
         pipe: AnimateDiffSparseControlNetPipeline = self.pipeline_class(**components)

diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py
@@ -1687,7 +1687,15 @@ def _test_xformers_attention_forwardGenerator_pass(
             self.assertLess(max_diff, expected_max_diff, "XFormers attention should not affect the inference results")
 
         if test_mean_pixel_difference:
-            assert_mean_pixel_difference(output_with_offload[0], output_without_offload[0])
+            if torch.is_tensor(output_without_offload):
 def test_xformers_attention_forwardGenerator_pass(self): 
 def test_xformers_attention_forwardGenerator_pass(self): 
+                if output_without_offload.ndim == 5:
+                    # Educated guess that the original format here is [B, F, C, H, W] and we
+                    # permute to [B, F, H, W, C] to make input compatible with mean pixel difference
+                    output_without_offload = output_without_offload.permute(0, 1, 3, 4, 2)[0]
+                    output_with_offload = output_with_offload.permute(0, 1, 3, 4, 2)[0]
+                output_without_offload = to_np(output_without_offload)
+                output_with_offload = to_np(output_with_offload)
+            assert_mean_pixel_difference(to_np(output_with_offload[0]), to_np(output_without_offload[0]))
 
     def test_progress_bar(self):
         components = self.get_dummy_components()