Implement EasyCache and Invent LazyCache (Comfy-Org#9496)

Kosinkadink · web-flow · commit fc247150fec5 · 2025-08-22T22:41:08.000-04:00
* Attempting a universal implementation of EasyCache, starting with flux as test; I screwed up the math a bit, but when I set it just right it works.

* Fixed math to make threshold work as expected, refactored code to use EasyCacheHolder instead of a dict wrapped by object

* Use sigmas from transformer_options instead of timesteps to be compatible with a greater amount of models, make end_percent work

* Make log statement when not skipping useful, preparing for per-cond caching

* Added DIFFUSION_MODEL wrapper around forward function for wan model

* Add subsampling for heuristic inputs

* Add subsampling to output_prev (output_prev_subsampled now)

* Properly consider conds in EasyCache logic

* Created SuperEasyCache to test what happens if caching and reuse is moved outside the scope of conds, added PREDICT_NOISE wrapper to facilitate this test

* Change max reuse_threshold to 3.0

* Mark EasyCache/SuperEasyCache as experimental (beta)

* Make Lumina2 compatible with EasyCache

* Add EasyCache support for Qwen Image

* Fix missing comma, curse you Cursor

* Add EasyCache support to AceStep

* Add EasyCache support to Chroma

* Added EasyCache support to Cosmos Predict t2i

* Make EasyCache not crash with Cosmos Predict ImagToVideo latents, but does not work well at all

* Add EasyCache support to hidream

* Added EasyCache support to hunyuan video

* Added EasyCache support to hunyuan3d

* Added EasyCache support to LTXV (not very good, but does not crash)

* Implemented EasyCache for aura_flow

* Renamed SuperEasyCache to LazyCache, hardcoded subsample_factor to 8 on nodes

* Eatra logging when verbose is true for EasyCache
diff --git a/comfy/ldm/ace/model.py b/comfy/ldm/ace/model.py
@@ -19,6 +19,7 @@
 from torch import nn
 
 import comfy.model_management
+import comfy.patcher_extension
 
 from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
 from .attention import LinearTransformerBlock, t2i_modulate
@@ -343,7 +344,28 @@ def decode(
         output = self.final_layer(hidden_states, embedded_timestep, output_length)
         return output
 
-    def forward(
+    def forward(self,
+        x,
+        timestep,
+        attention_mask=None,
+        context: Optional[torch.Tensor] = None,
+        text_attention_mask: Optional[torch.LongTensor] = None,
+        speaker_embeds: Optional[torch.FloatTensor] = None,
+        lyric_token_idx: Optional[torch.LongTensor] = None,
+        lyric_mask: Optional[torch.LongTensor] = None,
+        block_controlnet_hidden_states: Optional[Union[List[torch.Tensor], torch.Tensor]] = None,
+        controlnet_scale: Union[float, torch.Tensor] = 1.0,
+        lyrics_strength=1.0,
+        **kwargs
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timestep, attention_mask, context, text_attention_mask, speaker_embeds, lyric_token_idx, lyric_mask, block_controlnet_hidden_states,
+                  controlnet_scale, lyrics_strength, **kwargs)
+
+    def _forward(
         self,
         x,
         timestep,
diff --git a/comfy/ldm/aura/mmdit.py b/comfy/ldm/aura/mmdit.py
@@ -9,6 +9,7 @@
 
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.ops
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 def modulate(x, shift, scale):
@@ -436,6 +437,13 @@ def apply_pos_embeds(self, x, h, w):
         return x + pos_encoding.reshape(1, -1, self.positional_encoding.shape[-1])
 
     def forward(self, x, timestep, context, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, transformer_options={}, **kwargs):
         patches_replace = transformer_options.get("patches_replace", {})
         # patchify x, add PE
         b, c, h, w = x.shape
diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py
@@ -5,6 +5,7 @@
 import torch
 from torch import Tensor, nn
 from einops import rearrange, repeat
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 from comfy.ldm.flux.layers import (
@@ -253,6 +254,13 @@ def block_wrap(args):
         return img
 
     def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, guidance, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
         bs, c, h, w = x.shape
         x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
 
diff --git a/comfy/ldm/cosmos/model.py b/comfy/ldm/cosmos/model.py
@@ -27,6 +27,8 @@
 from enum import Enum
 import logging
 
+import comfy.patcher_extension
+
 from .blocks import (
     FinalLayer,
     GeneralDITTransformerBlock,
@@ -435,6 +437,42 @@ def forward(
         latent_condition_sigma: Optional[torch.Tensor] = None,
         condition_video_augment_sigma: Optional[torch.Tensor] = None,
         **kwargs,
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x,
+                timesteps,
+                context,
+                attention_mask,
+                fps,
+                image_size,
+                padding_mask,
+                scalar_feature,
+                data_type,
+                latent_condition,
+                latent_condition_sigma,
+                condition_video_augment_sigma,
+                **kwargs)
+
+    def _forward(
+        self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        context: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        # crossattn_emb: torch.Tensor,
+        # crossattn_mask: Optional[torch.Tensor] = None,
+        fps: Optional[torch.Tensor] = None,
+        image_size: Optional[torch.Tensor] = None,
+        padding_mask: Optional[torch.Tensor] = None,
+        scalar_feature: Optional[torch.Tensor] = None,
+        data_type: Optional[DataType] = DataType.VIDEO,
+        latent_condition: Optional[torch.Tensor] = None,
+        latent_condition_sigma: Optional[torch.Tensor] = None,
+        condition_video_augment_sigma: Optional[torch.Tensor] = None,
+        **kwargs,
     ):
         """
         Args:
diff --git a/comfy/ldm/cosmos/predict2.py b/comfy/ldm/cosmos/predict2.py
@@ -11,6 +11,7 @@
 from .position_embedding import VideoRopePosition3DEmb, LearnablePosEmbAxis
 from torchvision import transforms
 
+import comfy.patcher_extension
 from comfy.ldm.modules.attention import optimized_attention
 
 def apply_rotary_pos_emb(
@@ -805,7 +806,21 @@ def unpatchify(self, x_B_T_H_W_M: torch.Tensor) -> torch.Tensor:
         )
         return x_B_C_Tt_Hp_Wp
 
-    def forward(
+    def forward(self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        context: torch.Tensor,
+        fps: Optional[torch.Tensor] = None,
+        padding_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timesteps, context, fps, padding_mask, **kwargs)
+
+    def _forward(
         self,
         x: torch.Tensor,
         timesteps: torch.Tensor,
diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py
@@ -6,6 +6,7 @@
 from torch import Tensor, nn
 from einops import rearrange, repeat
 import comfy.ldm.common_dit
+import comfy.patcher_extension
 
 from .layers import (
     DoubleStreamBlock,
@@ -214,6 +215,13 @@ def process_img(self, x, index=0, h_offset=0, w_offset=0):
         return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)
 
     def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, y, guidance, ref_latents, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
         bs, c, h_orig, w_orig = x.shape
         patch_size = self.patch_size
 
diff --git a/comfy/ldm/hidream/model.py b/comfy/ldm/hidream/model.py
@@ -13,6 +13,7 @@
 
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 
@@ -692,7 +693,23 @@ def patchify(self, x, max_seq, img_sizes=None):
             raise NotImplementedError
         return x, x_masks, img_sizes
 
-    def forward(
+    def forward(self,
+        x: torch.Tensor,
+        t: torch.Tensor,
+        y: Optional[torch.Tensor] = None,
+        context: Optional[torch.Tensor] = None,
+        encoder_hidden_states_llama3=None,
+        image_cond=None,
+        control = None,
+        transformer_options = {},
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, t, y, context, encoder_hidden_states_llama3, image_cond, control, transformer_options)
+
+    def _forward(
         self,
         x: torch.Tensor,
         t: torch.Tensor,
diff --git a/comfy/ldm/hunyuan3d/model.py b/comfy/ldm/hunyuan3d/model.py
@@ -7,6 +7,7 @@
     SingleStreamBlock,
     timestep_embedding,
 )
+import comfy.patcher_extension
 
 
 class Hunyuan3Dv2(nn.Module):
@@ -67,6 +68,13 @@ def __init__(
         self.final_layer = LastLayer(hidden_size, 1, in_channels, dtype=dtype, device=device, operations=operations)
 
     def forward(self, x, timestep, context, guidance=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, guidance, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, guidance=None, transformer_options={}, **kwargs):
         x = x.movedim(-1, -2)
         timestep = 1.0 - timestep
         txt = context
diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py
@@ -1,6 +1,7 @@
 #Based on Flux code because of weird hunyuan video code license.
 
 import torch
+import comfy.patcher_extension
 import comfy.ldm.flux.layers
 import comfy.ldm.modules.diffusionmodules.mmdit
 from comfy.ldm.modules.attention import optimized_attention
@@ -348,6 +349,13 @@ def img_ids(self, x):
         return repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
 
     def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, y, guidance, attention_mask, guiding_frame_index, ref_latent, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, control=None, transformer_options={}, **kwargs):
         bs, c, t, h, w = x.shape
         img_ids = self.img_ids(x)
         txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py
@@ -1,5 +1,6 @@
 import torch
 from torch import nn
+import comfy.patcher_extension
 import comfy.ldm.modules.attention
 import comfy.ldm.common_dit
 from einops import rearrange
@@ -420,6 +421,13 @@ def __init__(self,
         self.patchifier = SymmetricPatchifier(1)
 
     def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, attention_mask, frame_rate, transformer_options, keyframe_idxs, **kwargs)
+
+    def _forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
         patches_replace = transformer_options.get("patches_replace", {})
 
         orig_shape = list(x.shape)
diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
@@ -11,6 +11,7 @@
 from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
+import comfy.patcher_extension
 
 
 def modulate(x, scale):
@@ -590,8 +591,15 @@ def patchify_and_embed(
 
         return padded_full_embed, mask, img_sizes, l_effective_cap_len, freqs_cis
 
-    # def forward(self, x, t, cap_feats, cap_mask):
     def forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timesteps, context, num_tokens, attention_mask, **kwargs)
+
+    # def forward(self, x, t, cap_feats, cap_mask):
+    def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
         t = 1.0 - timesteps
         cap_feats = context
         cap_mask = attention_mask
diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
@@ -9,6 +9,7 @@
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
 import comfy.ldm.common_dit
+import comfy.patcher_extension
 
 class GELU(nn.Module):
     def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
@@ -355,7 +356,14 @@ def process_img(self, x, index=0, h_offset=0, w_offset=0):
         img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) - (w_len // 2)
         return hidden_states, repeat(img_ids, "h w c -> b (h w) c", b=bs), orig_shape
 
-    def forward(
+    def forward(self, x, timestep, context, attention_mask=None, guidance=None, ref_latents=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, attention_mask, guidance, ref_latents, transformer_options, **kwargs)
+
+    def _forward(
         self,
         x,
         timesteps,
diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
@@ -11,6 +11,7 @@
 from comfy.ldm.flux.math import apply_rope
 import comfy.ldm.common_dit
 import comfy.model_management
+import comfy.patcher_extension
 
 
 def sinusoidal_embedding_1d(dim, position):
@@ -573,6 +574,13 @@ def block_wrap(args):
         return x
 
     def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, clip_fea, time_dim_concat, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
         bs, c, t, h, w = x.shape
         x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
 
diff --git a/comfy/patcher_extension.py b/comfy/patcher_extension.py
@@ -50,6 +50,7 @@ class WrappersMP:
     OUTER_SAMPLE = "outer_sample"
     PREPARE_SAMPLING = "prepare_sampling"
     SAMPLER_SAMPLE = "sampler_sample"
+    PREDICT_NOISE = "predict_noise"
     CALC_COND_BATCH = "calc_cond_batch"
     APPLY_MODEL = "apply_model"
     DIFFUSION_MODEL = "diffusion_model"
diff --git a/comfy/samplers.py b/comfy/samplers.py
@@ -953,7 +953,14 @@ def inner_set_conds(self, conds):
             self.original_conds[k] = comfy.sampler_helpers.convert_cond(conds[k])
 
     def __call__(self, *args, **kwargs):
-        return self.predict_noise(*args, **kwargs)
+        return self.outer_predict_noise(*args, **kwargs)
+
+    def outer_predict_noise(self, x, timestep, model_options={}, seed=None):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self.predict_noise,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, self.model_options, is_model_options=True)
+        ).execute(x, timestep, model_options, seed)
 
     def predict_noise(self, x, timestep, model_options={}, seed=None):
         return sampling_function(self.inner_model, x, timestep, self.conds.get("negative", None), self.conds.get("positive", None), self.cfg, model_options=model_options, seed=seed)
diff --git a/comfy_extras/nodes_easycache.py b/comfy_extras/nodes_easycache.py
diff --git a/nodes.py b/nodes.py