From 705a3cb943e57887d1af6f6e02596fa614e52f72 Mon Sep 17 00:00:00 2001
From: FengWen <109639975+ccssu@users.noreply.github.com>
Date: Wed, 26 Jun 2024 03:36:36 +0800
Subject: [PATCH] add onediff_comfy_nodes/benchmarks (#956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- [x] ci 报错验证
https://github.com/siliconflow/onediff/actions/runs/9617534217/job/26529505713
---
 .github/workflows/examples.yml                |  16 +-
 .gitignore                                    |   1 +
 onediff_comfy_nodes/_config.py                |   2 +
 onediff_comfy_nodes/_nodes.py                 |  71 ++---
 onediff_comfy_nodes/benchmarks/README.md      |  59 ++++
 .../benchmarks/resources/prompts.txt          |  81 ++++++
 .../ipadapter_advanced.json                   | 180 +++++++++++++
 .../ComfyUI_InstantID/instantid_posed.json    | 201 ++++++++++++++
 .../resources/workflows/baseline/lora.json    | 126 +++++++++
 .../workflows/baseline/lora_multiple.json     | 145 ++++++++++
 .../workflows/baseline/sd3_baseline.json      | 173 ++++++++++++
 .../workflows/example_workflow_api.json       |  86 ++++++
 .../workflows/nexfort/sd3_unet_speedup.json   | 214 +++++++++++++++
 .../nexfort/sd3_unet_vae_speedup.json         | 255 ++++++++++++++++++
 .../ipadapter_advanced.json                   | 181 +++++++++++++
 .../instantid_posed_speedup.json              | 202 ++++++++++++++
 .../oneflow/lora_multiple_speedup.json        | 146 ++++++++++
 .../workflows/oneflow/lora_speedup.json       | 127 +++++++++
 .../oneflow/sdxl-control-lora-speedup.json    | 153 +++++++++++
 .../benchmarks/scripts/install_env.sh         |  31 +++
 .../benchmarks/scripts/run_all_tests.sh       |  19 ++
 .../benchmarks/scripts/run_nexfort_case_ci.sh |   1 +
 .../benchmarks/scripts/run_oneflow_case_ci.sh |  30 +++
 .../scripts/run_oneflow_case_local.sh         |  16 ++
 .../benchmarks/scripts/run_text_to_image.sh   |   9 +
 .../benchmarks/scripts/text_to_image.py       | 153 +++++++++++
 .../benchmarks/src/core/__init__.py           |   3 +
 .../benchmarks/src/core/log_utils.py          |  52 ++++
 .../benchmarks/src/core/registry.py           |  31 +++
 .../benchmarks/src/core/service_client.py     | 203 ++++++++++++++
 .../benchmarks/src/input_registration.py      | 153 +++++++++++
 .../extras_nodes/nodes_nexfort_booster.py     |   4 +-
 .../extras_nodes/nodes_oneflow_booster.py     |  30 +--
 onediff_comfy_nodes/modules/booster_cache.py  |  60 ++++-
 .../modules/nexfort/booster_basic.py          |  11 +-
 .../modules/oneflow/__init__.py               |  20 ++
 .../modules/oneflow/booster_basic.py          |  38 ++-
 .../set_model_patch_replace.py                |   6 +-
 .../oneflow/hijack_pulid_comfyui/__init__.py  |   4 +
 .../oneflow/hijack_pulid_comfyui/_config.py   |  25 ++
 .../oneflow/hijack_pulid_comfyui/pulid.py     |  26 ++
 .../register_comfy/CrossAttentionPatch.py     |  73 ++++-
 .../modules/oneflow/utils/graph_path.py       |   7 +-
 .../modules/torch_compile/booster_basic.py    |   5 +-
 .../backends/nexfort/nexfort.py               |   5 +-
 .../backends/oneflow/dual_module.py           |   2 +
 tests/comfy-docker-compose.yml                |   2 +
 tests/comfyui/extra_model_paths.yaml          |   9 +-
 48 files changed, 3343 insertions(+), 104 deletions(-)
 create mode 100644 onediff_comfy_nodes/benchmarks/README.md
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/prompts.txt
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_InstantID/instantid_posed.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora_multiple.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/baseline/sd3_baseline.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/example_workflow_api.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_vae_speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_InstantID/instantid_posed_speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_multiple_speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/sdxl-control-lora-speedup.json
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/install_env.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/run_all_tests.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/run_nexfort_case_ci.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_ci.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_local.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/run_text_to_image.sh
 create mode 100644 onediff_comfy_nodes/benchmarks/scripts/text_to_image.py
 create mode 100644 onediff_comfy_nodes/benchmarks/src/core/__init__.py
 create mode 100644 onediff_comfy_nodes/benchmarks/src/core/log_utils.py
 create mode 100644 onediff_comfy_nodes/benchmarks/src/core/registry.py
 create mode 100644 onediff_comfy_nodes/benchmarks/src/core/service_client.py
 create mode 100644 onediff_comfy_nodes/benchmarks/src/input_registration.py
 create mode 100644 onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/__init__.py
 create mode 100644 onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/_config.py
 create mode 100644 onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/pulid.py

diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index ab85e1bfb..c1a6ed3b9 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -282,7 +282,7 @@ jobs:
       - name: Start ComfyUI Web Service
         if: matrix.test-suite == 'comfy'
         run: |
-          docker exec -w /app/ComfyUI -d ${{ env.CONTAINER_NAME }} sh -c "python3 /app/ComfyUI/main.py --port 8188 --extra-model-paths-config /src/onediff/tests/comfyui/extra_model_paths.yaml > /app/ComfyUI/onediff_comfyui.log 2>&1"
+          docker exec -w /app/ComfyUI -d ${{ env.CONTAINER_NAME }} sh -c "python3 /app/ComfyUI/main.py --gpu-only --disable-cuda-malloc --port 8188 --extra-model-paths-config /src/onediff/tests/comfyui/extra_model_paths.yaml > /app/ComfyUI/onediff_comfyui.log 2>&1"
           sleep 30
       # print to check if comfy is launched successfully
       - run: docker exec ${{ env.CONTAINER_NAME }} ps aux
@@ -298,13 +298,15 @@ jobs:
               false
             }
           }
-
-          run_comfy_test "workflows/sdxl-unet-speedup-graph-saver.json" 200
-          run_comfy_test "workflows/sdxl-control-lora-speedup.json" 200
-          run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/ipadapter_advanced.json" 200
-          run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache.json" 600
-          run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache-with-lora.json" 800
+          # run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache.json" 600
+          # run_comfy_test "/share_nfs/hf_models/comfyui_resources/workflows/deep-cache-with-lora.json" 800
           # run_comfy_test "workflows/text-to-video-speedup.json" 5000
+          docker exec -w /src/onediff/onediff_comfy_nodes/benchmarks ${{ env.CONTAINER_NAME }} bash scripts/install_env.sh /app/ComfyUI
+          docker exec -w /src/onediff/onediff_comfy_nodes/benchmarks ${{ env.CONTAINER_NAME }} bash scripts/run_all_tests.sh || {
+              echo "Test fails! print the ComfyUI logs..."
+              docker exec onediff-test cat /app/ComfyUI/onediff_comfyui.log
+              false
+            }
 
       - name: Show ComfyUI Log
         if: matrix.test-suite == 'comfy'
diff --git a/.gitignore b/.gitignore
index 677317453..d94539607 100644
--- a/.gitignore
+++ b/.gitignore
@@ -180,3 +180,4 @@ unet_graphs
 
 # onediff_sd_webui_extensions
 onediff_sd_webui_extensions/compiled_caches/
+onediff_comfy_nodes/benchmarks/results/
diff --git a/onediff_comfy_nodes/_config.py b/onediff_comfy_nodes/_config.py
index 0ee882e9c..dc1b5a21f 100644
--- a/onediff_comfy_nodes/_config.py
+++ b/onediff_comfy_nodes/_config.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import torch
 import folder_paths
 
 __all__ = [
@@ -8,6 +9,7 @@
     "is_disable_oneflow_backend",
 ]
 
+
 # https://github.com/comfyanonymous/ComfyUI/blob/master/folder_paths.py#L9
 os.environ["COMFYUI_ROOT"] = folder_paths.base_path
 _default_backend = os.environ.get("ONEDIFF_COMFY_NODES_DEFAULT_BACKEND", "oneflow")
diff --git a/onediff_comfy_nodes/_nodes.py b/onediff_comfy_nodes/_nodes.py
index ccf695913..e748544b4 100644
--- a/onediff_comfy_nodes/_nodes.py
+++ b/onediff_comfy_nodes/_nodes.py
@@ -13,10 +13,12 @@
     from .modules.oneflow import BasicOneFlowBoosterExecutor
 
     BasicBoosterExecutor = BasicOneFlowBoosterExecutor
+    print("\033[1;31mUsing OneFlow backend\033[0m (Default)")
 elif is_nexfort_available():
     from .modules.nexfort.booster_basic import BasicNexFortBoosterExecutor
 
     BasicBoosterExecutor = BasicNexFortBoosterExecutor
+    print("\033[1;32mUsing Nexfort backend\033[0m (Default)")
 else:
     raise RuntimeError(
         "Neither OneFlow nor Nexfort is available. Please ensure at least one of them is installed."
@@ -44,6 +46,7 @@ def speedup(
         model,
         inplace: bool = False,
         custom_booster: Optional[BoosterScheduler] = None,
+        booster_settings: Optional[BoosterSettings] = None,
         *args,
         **kwargs
     ) -> Tuple:
@@ -60,7 +63,7 @@ def speedup(
         Returns:
             Tuple: Tuple containing the optimized model.
         """
-        if not hasattr(self, "booster_settings"):
+        if booster_settings is None and not hasattr(self, "booster_settings"):
             self.booster_settings = BoosterSettings(tmp_cache_key=str(uuid.uuid4()))
 
         if custom_booster:
@@ -68,7 +71,9 @@ def speedup(
             booster.inplace = inplace
         else:
             booster = BoosterScheduler(BasicBoosterExecutor(), inplace=inplace)
-        booster.settings = self.booster_settings
+        booster.settings = (
+            self.booster_settings if booster_settings is None else booster_settings
+        )
         return (booster(model, *args, **kwargs),)
 
 
@@ -93,6 +98,9 @@ def INPUT_TYPES(s):
 
     RETURN_TYPES = ("VAE",)
 
+    def speedup(self, vae, inplace=False, custom_booster: BoosterScheduler = None):
+        return super().speedup(vae, inplace, custom_booster)
+
 
 class ControlnetSpeedup:
     @classmethod
@@ -193,7 +201,7 @@ def onediff_load_controlnet(self, control_net_name, custom_booster=None):
         return (controlnet,)
 
 
-class OneDiffCheckpointLoaderSimple(CheckpointLoaderSimple):
+class OneDiffCheckpointLoaderSimple(CheckpointLoaderSimple, SpeedupMixin):
     @classmethod
     def INPUT_TYPES(s):
         return {
@@ -207,40 +215,35 @@ def INPUT_TYPES(s):
     CATEGORY = "OneDiff/Loaders"
     FUNCTION = "onediff_load_checkpoint"
 
-    @staticmethod
-    def _load_checkpoint(
-        ckpt_name, vae_speedup="disable", custom_booster: BoosterScheduler = None
-    ):
-        """Loads a checkpoint, applying speedup techniques."""
-
-        ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
-        out = comfy.sd.load_checkpoint_guess_config(
-            ckpt_path,
-            output_vae=True,
-            output_clip=True,
-            embedding_directory=folder_paths.get_folder_paths("embeddings"),
-        )
-
-        # Unpack outputs
-        modelpatcher, clip, vae = out[:3]
+    def __init__(self) -> None:
+        super().__init__()
+        self.unet_booster_settings = BoosterSettings(tmp_cache_key=str(uuid.uuid4()))
+        self.vae_booster_settings = BoosterSettings(tmp_cache_key=str(uuid.uuid4()))
 
-        # Apply custom booster if provided, otherwise use a basic one
-        custom_booster = custom_booster or BoosterScheduler(BasicBoosterExecutor())
-        modelpatcher = custom_booster(modelpatcher, ckpt_name=ckpt_name)
+    @torch.inference_mode()
+    def onediff_load_checkpoint(
+        self, ckpt_name, vae_speedup="disable", custom_booster: BoosterScheduler = None,
+    ):
+        modelpatcher, clip, vae = self.load_checkpoint(ckpt_name)
+        modelpatcher = self.speedup(
+            modelpatcher,
+            inplace=True,
+            custom_booster=custom_booster,
+            booster_settings=self.unet_booster_settings,
+        )[0]
 
-        # Apply VAE speedup if enabled
         if vae_speedup == "enable":
-            vae = BoosterScheduler(BasicBoosterExecutor())(vae, ckpt_name=ckpt_name)
+            vae = self.speedup(
+                vae,
+                inplace=True,
+                custom_booster=custom_booster,
+                booster_settings=self.vae_booster_settings,
+            )[0]
 
         # Set weight inplace update
         modelpatcher.weight_inplace_update = True
-
-        return modelpatcher, clip, vae
-
-    @torch.inference_mode()
-    def onediff_load_checkpoint(
-        self, ckpt_name, vae_speedup="disable", custom_booster: BoosterScheduler = None,
-    ):
-        out = self._load_checkpoint(ckpt_name, vae_speedup, custom_booster)
-        # Return the loaded checkpoint (modelpatcher, clip, vae)
-        return out
+        return (
+            modelpatcher,
+            clip,
+            vae,
+        )
diff --git a/onediff_comfy_nodes/benchmarks/README.md b/onediff_comfy_nodes/benchmarks/README.md
new file mode 100644
index 000000000..d3435167e
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/README.md
@@ -0,0 +1,59 @@
+## Environment setup
+### Set up ComfyUI
+https://github.com/comfyanonymous/ComfyUI
+
+### Set up onediff_comfy_nodes
+https://github.com/siliconflow/onediff?tab=readme-ov-file#installation
+
+```shell
+# python 3.10
+git clone https://github.com/siliconflow/onediff.git
+cd onediff && pip install -e .
+ln -s $(pwd)/onediff_comfy_nodes path/to/ComfyUI/custom_nodes/
+# or
+# cp -r onediff_comfy_nodes path/to/ComfyUI/custom_nodes/
+```
+### Set up nexfort backend
+https://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort
+
+## Getting Started
+### Run ComfyUI
+Note ⚠️: Replace 'path/to/' with the actual path to the directories and files on your system.
+```shell
+export COMFYUI_ROOT=path/to/ComfyUI
+
+cd path/to/onediff/onediff_comfy_nodes/benchmarks
+
+bash scripts/install_env.sh $COMFYUI_ROOT
+
+cd  $COMFYUI_ROOT
+
+python main.py --gpu-only --port 8188 --extra-model-paths-config path/to/onediff/tests/comfyui/extra_model_paths.yaml 
+```
+
+## Usage Example
+
+```shell
+cd path/to/onediff/onediff_comfy_nodes/benchmarks
+
+bash scripts/run_text_to_image.sh
+```
+
+The output results will be saved in the results/ directory.
+
+## How to add a workflow for testing
+To add a workflow for testing, you can refer to the `run_text_to_image.sh` script and the `input_registration.py` file in the `src` directory. Here's an example of how to register a workflow generator:
+
+```python
+# file: onediff/onediff_comfy_nodes/benchmarks/src/input_registration.py
+
+@register_generator(f"{WORKFLOW_DIR}/example_workflow_api.json")
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["3"])
+    for height in [1024, 768, 512]:
+        for width in [1024, 768, 512]:
+            graph.set_image_size(height=height, width=width)
+            yield InputParams(graph=graph)
+```
diff --git a/onediff_comfy_nodes/benchmarks/resources/prompts.txt b/onediff_comfy_nodes/benchmarks/resources/prompts.txt
new file mode 100644
index 000000000..6ed7d8697
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/prompts.txt
@@ -0,0 +1,81 @@
+universe,stars,moon
+Morning fog over sleepy village.
+A group of giraffes eating leaves，sweetveld
+in style of Kawanabe Kyosai, beautiful details
+a dog,hold hot dog,outdoors,grass
+A pickup truck going up a mountain switchback
+texture of marble, top down close-up, video game
+robot droids, in the desert , colorful, dutch angle
+Man snowboarding on the mars, ultra high resolution 8k
+ink cartoon frazzled cute cat, nervous cat, white background
+texture of wood bark, top down close-up, video game
+1boy,underwater,green eyes,white skirt,looking at viewer
+A vibrant tropical rainforest, cold color palette, muted colors, detailed
+Fluffy Samoye, smile, holding a toy ball in his mouth
+in style of Alphonso Mucha , character concept design, half body
+Cyberpunk style,urban,1 robot,an electronic screen with “Khazix”
+in style of E.H. Shepard , character, ink art, side view
+breathtaking night street of Tokyo, neon lights. award-winning, professional, highly detailed
+concept art {prompt}. digital artwork, illustrative, painterly, matte painting, highly detailed
+16-bit pixel art, a cozy cafe side view, a beautiful day
+Byzantine Mosaic Art of a single purple flower in a withe vase.
+A cute cat with a sign saying "Go Big or Go Home".
+a cute cat with a sign saying "Go Big or Go Home"
+A landscape photo of Iceland, with aurora, snow, ice and erupting lava
+Fauvist Depiction of a Sunlit Village with Simplified Forms and Intense Color Contrasts.
+A majestic lion stands proudly on a rock, overlooking the vast African savannah
+anime artwork an empty classroom. anime style, key visual, vibrant, studio anime, highly detailed
+claymation style captain jack sparrow on tropical island. sculpture, clay art, centered composition, play-doh
+claymation style captain jack sparrow on tropical island. sculpture, clay art, centered composition, play-doh
+isometric pixel-art of wizard working on spells. low-res, blocky, pixel art style, 16-bit graphics
+a latina woman with a pearl earring,best quality,masterpiece,ultra detailed,UHD 4K,photographic
+isometric style farmhouse from RPG game, unreal engine, vibrant, beautiful, crisp, detailed, ultra detailed, intricate
+professional 3d model bulky purple mecha with missiles. octane render, highly detailed, volumetric, dramatic lighting
+The 90s, a beautiful woman with a radiant smile and long hair, dressed in summer attire
+A dolphin leaps through the waves, set against a backdrop of bright blues and teal hues
+breathtaking selfie photograph of astronaut floating in space, earth in the background. award-winning, professional, highly detailed
+origami style Winterfell. paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition
+line art of a kitchen in perspective. professional, sleek, modern, minimalist, graphic, line art, vector graphics
+High resolution HDR photograph of a broken heart, crying by artist "Volko Merschky", by artist"Simone Pfaff".
+A cute rusty robot made if garbage with a panel "I'M SD3-BASED" above it, an evil pupeeter.
+a female character design, short hair wearing headphones, background in the city,Pixel style, transparent pvc jacket
+cinematic photo of a woman sitting at a cafe. 35mm photograph, film, bokeh, professional, 4k, highly detailed
+a man wearing a corduroy outfit at a fashion photoshoot, best quality,masterpiece,ultra detailed,UHD 4K,photographic
+anime artwork a girl looking at the sea, dramatic, anime style, key visual, vibrant, studio anime, highly detailed
+concept art of dragon flying over town, clouds. digital artwork, illustrative, painterly, matte painting, highly detailed, cinematic composition
+cinematic photo of a construction worker looking down at city. 35mm photograph, film, bokeh, professional, 4k, highly detailed
+Anime girl, a beautiful warrior with flowing silver hair stands in the midst of a battle - ravaged landscape.
+vaporwave synthwave style Los Angeles street. cyberpunk, neon, vibes, stunningly beautiful, crisp, detailed, sleek, ultramodern, high contrast, cinematic composition
+low-poly style tropical island with clouds, ambient occlusion . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition
+a cat,a destroyed badly damaged space ship,beautiful beach,broken windows, grass and flowers grow around,sunny,ocean
+top down shot of a miniature cottage with garden, delicate details, vivid scene, lovely style, cute, sweet, soft atmosphere, Microlandscape
+angel prays in the red shore flower, in the style of David Hettinger, white and red, Dmitry Kustanovich, vibrant statues
+Vincent Willem van Gogh，Decorate with bright and exaggerated col，High brightness， high purity， and high brightness colors，Modern and fashionable color decoration
+ethereal fantasy concept art of sorceress casting spells. magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy
+ethereal fantasy concept art of thunder god with hammer. magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy
+An ornate, Victorian-era key lying on a weathered, wooden surface, with intricate, steampunk-inspired gears and mechanisms visible within its transparent, glass shaft.
+A quirky, steampunk robot with brass gears and a top hat, serving tea in a Victorian parlor, captured in a whimsical photorealistic style.
+A beautiful painting of flowing colors and styles forming the words “SD3 is coming!”, the background is speckled with drops and splashes of paint.
+In the shadow of the last sun, a fisherman had fallen asleep, and he had a furrow along his face, like a sort of smile.
+cinematic film still, stormtrooper taking aim. shallow depth of field, vignette, highly detailed, high budget Hollywood movie, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy
+Inside the cockpit of an airplane during sunset, a futuristic city is visible in the distance. A faint crescent moon can be seen in the sky.
+analog film photo of old woman on the streets of london . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage
+Photo of a bear wearing a suit and tophat in a river in the middle of a forest holding a sign that says"I can't bear it".
+An anthopomorphic pink donut with a mustache and cowboy hat standing by a log cabin in a forest with an old 1970s orange truck in the driveway.
+An elegant and feminine packaging design for a luxury, scented candle collection, with a soft, pastel color palette, a frosted glass jar, and a delicate, floral-patterned box.
+Awesome artwork of a wizard on the top of a mountain, he's creating the big text "Stable Diffusion 3 API" with magic, magic text, at dawn, sunrise.
+vaporwave style of porsche driving through tokyo. cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional
+A brooding male character with tousled black hair and a cape stands with a determined expression, backlit by fiery, swirling brushstrokes of orange and yellow, creating a dramatic and intense mood.
+A dark-armored warrior with ornate golden details, cloaked in a flowing black cape, wielding a radiant, fiery sword, standing amidst an ominous cloudy backdrop with dramatic lighting, exuding a menacing, powerful presence.
+Side angle of a Chinese woman wearing a black coat, 25 years old, long straight black hair, street shot, in winter, dynamic blur, black and white, retro, side shot, side, panorama, HD, 16K
+((anime style)),1girl, indoors, sitting on the sofa, living room, pink hair, blue eyes, from back, from above, face towards viewer, playing video games, holding controller, white shirt, short, parted lips, anime production
+A black and sleek robot is looking at a glass blue orb full of electrical current hovering in front of him. The robot has red glowing eyes. The background has a soft glowing red light.
+Cartoon hand, little girl, long colored hair, holographic coat, white shorts, fair skin, blue eyes, white sneakers, full-body photo, full body, panorama, best quality, best picture quality, black highly reflective background cloth, movie-level lighting effect
+Masterpiece, best quality, girl, having a tattoo that says "Welcome to SiliconFlow". collarbone, wavy hair, looking at viewer, blurry foreground, upper body, necklace, contemporary, plain pants, intricate, print, pattern, ponytail, red hair, dappled sunlight, smile, happy.
+A classical painting of a gothic black knight standing in the middle of a field of white flowers. Tall trees are in the distance. The lighting is somber and dramatic, during the golden hour of the day.
+A portrait painting of a handsome man with dark hair and dreamy green eyes. His hair is dark brown, curly, and short. The background is dark and moody with hints of red. The lighting is somber and dramatic.
+line art, line style, 1girl, solo, japanese clothes, hand fan, kimono, black hair, paper fan, outdoors, holding, holding fan, upper body, floral print, short hair, black eyes, brown eyes, black kimono, yukata, blue kimono, uchiwa, closed mouth, sash, expressionless, medium hair
+A boy with a sword in his hand takes a fighting stance on the high street with a sword slashed at the camera, close-up, pov view, first person, blue whirlwind flame, glow, surrealism, ultra-futurism, cyberpunk, 3D art, rich detail, best quality, centered
+Anime girl, masterpiece, best quality, hatsune miku, white gown, angel, angel wings, golden halo, dark background, upper body, closed mouth, looking at viewer, arms behind back, blue theme, night, highres, 4k, 8k, intricate detail, cinematic lighting, amazing quality, amazing shading, soft lighting, detailed Illustration, anime style, wallpaper.
+Super close-up shot of a weathered male skull almost buried by sand,side view,a fresh plant with two green leaves growing from the skull,detailed texture,shot from the botton,epic,super photorealism,cinematic,scenery,sunset,wasteland,desert,dune wave,super-detailed,highly realistic,8k,artistic,contrast lighting,vibrant color,hdr,erode
+a (side view close-up half-body:1.85) fashion photoshoot photo of darth vader wearing a pink and white diamond studded outfit, his chest has a (very big CRT screen showing a pacman game:1.7), his helmet is made of a hello kitty themed white plastic, his helmet has sticker decals on it
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
new file mode 100644
index 000000000..5aa32dbeb
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
@@ -0,0 +1,180 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 30,
+      "cfg": 6.5,
+      "sampler_name": "ddpm",
+      "scheduler": "karras",
+      "denoise": 1,
+      "model": [
+        "14",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "4": {
+    "inputs": {
+      "ckpt_name": "sd15/020.realisticVisionV51_v51VAE.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "in a peaceful spring morning a woman wearing a white shirt is sitting in a park on a bench\n\nhigh quality, detailed, diffuse light",
+      "clip": [
+        "4",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "blurry, noisy, messy, lowres, jpeg, artifacts, ill, distorted, malformed",
+      "clip": [
+        "4",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "4",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "IPAdapter",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "12": {
+    "inputs": {
+      "image": "input_image_vermeer.png",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "14": {
+    "inputs": {
+      "weight": 0.8,
+      "weight_type": "linear",
+      "combine_embeds": "concat",
+      "start_at": 0,
+      "end_at": 1,
+      "embeds_scaling": "V only",
+      "model": [
+        "4",
+        0
+      ],
+      "ipadapter": [
+        "15",
+        0
+      ],
+      "image": [
+        "17",
+        0
+      ],
+      "clip_vision": [
+        "16",
+        0
+      ]
+    },
+    "class_type": "IPAdapterAdvanced",
+    "_meta": {
+      "title": "IPAdapter Advanced"
+    }
+  },
+  "15": {
+    "inputs": {
+      "ipadapter_file": "ip-adapter-plus_sd15.safetensors"
+    },
+    "class_type": "IPAdapterModelLoader",
+    "_meta": {
+      "title": "IPAdapter Model Loader"
+    }
+  },
+  "16": {
+    "inputs": {
+      "clip_name": "SD1.5/pytorch_model.bin"
+    },
+    "class_type": "CLIPVisionLoader",
+    "_meta": {
+      "title": "Load CLIP Vision"
+    }
+  },
+  "17": {
+    "inputs": {
+      "interpolation": "LANCZOS",
+      "crop_position": "top",
+      "sharpening": 0.15,
+      "image": [
+        "12",
+        0
+      ]
+    },
+    "class_type": "PrepImageForClipVision",
+    "_meta": {
+      "title": "Prep Image For ClipVision"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_InstantID/instantid_posed.json b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_InstantID/instantid_posed.json
new file mode 100644
index 000000000..4e6d387a6
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/ComfyUI_InstantID/instantid_posed.json
@@ -0,0 +1,201 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 30,
+      "cfg": 4.5,
+      "sampler_name": "ddpm",
+      "scheduler": "karras",
+      "denoise": 1,
+      "model": [
+        "60",
+        0
+      ],
+      "positive": [
+        "60",
+        1
+      ],
+      "negative": [
+        "60",
+        2
+      ],
+      "latent_image": [
+        "69",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "71",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "11": {
+    "inputs": {
+      "instantid_file": "ip-adapter.bin"
+    },
+    "class_type": "InstantIDModelLoader",
+    "_meta": {
+      "title": "Load InstantID Model"
+    }
+  },
+  "13": {
+    "inputs": {
+      "image": "daydreaming.jpg",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "15": {
+    "inputs": {
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "16": {
+    "inputs": {
+      "control_net_name": "instantid/diffusion_pytorch_model.safetensors"
+    },
+    "class_type": "ControlNetLoader",
+    "_meta": {
+      "title": "Load ControlNet Model"
+    }
+  },
+  "38": {
+    "inputs": {
+      "provider": "CPU"
+    },
+    "class_type": "InstantIDFaceAnalysis",
+    "_meta": {
+      "title": "InstantID Face Analysis"
+    }
+  },
+  "39": {
+    "inputs": {
+      "text": "comic character. graphic illustration, comic art, graphic novel art, vibrant, highly detailed",
+      "clip": [
+        "71",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "40": {
+    "inputs": {
+      "text": "photograph, deformed, glitch, noisy, realistic, stock photo",
+      "clip": [
+        "71",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "60": {
+    "inputs": {
+      "weight": 0.8,
+      "start_at": 0,
+      "end_at": 1,
+      "instantid": [
+        "11",
+        0
+      ],
+      "insightface": [
+        "38",
+        0
+      ],
+      "control_net": [
+        "16",
+        0
+      ],
+      "image": [
+        "13",
+        0
+      ],
+      "model": [
+        "71",
+        0
+      ],
+      "positive": [
+        "39",
+        0
+      ],
+      "negative": [
+        "40",
+        0
+      ],
+      "image_kps": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ApplyInstantID",
+    "_meta": {
+      "title": "Apply InstantID"
+    }
+  },
+  "67": {
+    "inputs": {
+      "image": "example.png",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "69": {
+    "inputs": {
+      "pixels": [
+        "67",
+        0
+      ],
+      "vae": [
+        "71",
+        2
+      ]
+    },
+    "class_type": "VAEEncode",
+    "_meta": {
+      "title": "VAE Encode"
+    }
+  },
+  "71": {
+    "inputs": {
+      "ckpt_name": "sdxl/dreamshaperXL_v21TurboDPMSDE.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora.json b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora.json
new file mode 100644
index 000000000..16afa17cc
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora.json
@@ -0,0 +1,126 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 20,
+      "cfg": 8,
+      "sampler_name": "euler",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "10",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "4": {
+    "inputs": {
+      "ckpt_name": "v1-5-pruned-emaonly.ckpt"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 768,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "masterpiece best quality girl, hanfu",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "bad hands",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "4",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "10": {
+    "inputs": {
+      "lora_name": "sd1.5/hanfu.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "4",
+        0
+      ],
+      "clip": [
+        "4",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora_multiple.json b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora_multiple.json
new file mode 100644
index 000000000..a42d187fe
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/lora_multiple.json
@@ -0,0 +1,145 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 20,
+      "cfg": 8,
+      "sampler_name": "euler",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "10",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "4": {
+    "inputs": {
+      "ckpt_name": "sd15/020.realisticVisionV51_v51VAE.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 768,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "masterpiece best quality girl, hanfu",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "bad hands",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "4",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "10": {
+    "inputs": {
+      "lora_name": "sd1.5/hanfu.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "11",
+        0
+      ],
+      "clip": [
+        "11",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  },
+  "11": {
+    "inputs": {
+      "lora_name": "sd1.5/.blindbox.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "4",
+        0
+      ],
+      "clip": [
+        "4",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/sd3_baseline.json b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/sd3_baseline.json
new file mode 100644
index 000000000..196f9bb1e
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/baseline/sd3_baseline.json
@@ -0,0 +1,173 @@
+{
+  "6": {
+    "inputs": {
+      "text": "a female character with long, flowing hair that appears to be made of ethereal, swirling patterns resembling the Northern Lights or Aurora Borealis. The background is dominated by deep blues and purples, creating a mysterious and dramatic atmosphere. The character's face is serene, with pale skin and striking features. She wears a dark-colored outfit with subtle patterns. The overall style of the artwork is reminiscent of fantasy or supernatural genres",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "11": {
+    "inputs": {
+      "clip_name1": "clip_g.safetensors",
+      "clip_name2": "clip_l.safetensors",
+      "clip_name3": "t5xxl_fp8_e4m3fn.safetensors"
+    },
+    "class_type": "TripleCLIPLoader",
+    "_meta": {
+      "title": "TripleCLIPLoader"
+    }
+  },
+  "67": {
+    "inputs": {
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningZeroOut",
+    "_meta": {
+      "title": "ConditioningZeroOut"
+    }
+  },
+  "68": {
+    "inputs": {
+      "start": 0.1,
+      "end": 1,
+      "conditioning": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "69": {
+    "inputs": {
+      "conditioning_1": [
+        "68",
+        0
+      ],
+      "conditioning_2": [
+        "70",
+        0
+      ]
+    },
+    "class_type": "ConditioningCombine",
+    "_meta": {
+      "title": "Conditioning (Combine)"
+    }
+  },
+  "70": {
+    "inputs": {
+      "start": 0,
+      "end": 0.1,
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "71": {
+    "inputs": {
+      "text": "bad quality, poor quality, doll, disfigured, jpg, toy, bad anatomy, missing limbs, missing fingers, 3d, cgi",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Negative Prompt)"
+    }
+  },
+  "135": {
+    "inputs": {
+      "width": 768,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "233": {
+    "inputs": {
+      "images": [
+        "282",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "252": {
+    "inputs": {
+      "ckpt_name": "sd3_medium.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "271": {
+    "inputs": {
+      "seed": 1,
+      "steps": 28,
+      "cfg": 4.5,
+      "sampler_name": "dpmpp_2m",
+      "scheduler": "sgm_uniform",
+      "denoise": 1,
+      "model": [
+        "252",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "69",
+        0
+      ],
+      "latent_image": [
+        "135",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "282": {
+    "inputs": {
+      "samples": [
+        "271",
+        0
+      ],
+      "vae": [
+        "252",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/example_workflow_api.json b/onediff_comfy_nodes/benchmarks/resources/workflows/example_workflow_api.json
new file mode 100644
index 000000000..a487d5cb1
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/example_workflow_api.json
@@ -0,0 +1,86 @@
+{
+    "3": {
+        "class_type": "KSampler",
+        "inputs": {
+            "cfg": 8,
+            "denoise": 1,
+            "latent_image": [
+                "5",
+                0
+            ],
+            "model": [
+                "4",
+                0
+            ],
+            "negative": [
+                "7",
+                0
+            ],
+            "positive": [
+                "6",
+                0
+            ],
+            "sampler_name": "euler",
+            "scheduler": "normal",
+            "seed": 1,
+            "steps": 20
+        }
+    },
+    "4": {
+        "class_type": "CheckpointLoaderSimple",
+        "inputs": {
+            "ckpt_name": "sd_xl_base_1.0.safetensors"
+        }
+    },
+    "5": {
+        "class_type": "EmptyLatentImage",
+        "inputs": {
+            "batch_size": 1,
+            "height": 512,
+            "width": 512
+        }
+    },
+    "6": {
+        "class_type": "CLIPTextEncode",
+        "inputs": {
+            "clip": [
+                "4",
+                1
+            ],
+            "text": "masterpiece best quality girl"
+        }
+    },
+    "7": {
+        "class_type": "CLIPTextEncode",
+        "inputs": {
+            "clip": [
+                "4",
+                1
+            ],
+            "text": "bad hands"
+        }
+    },
+    "8": {
+        "class_type": "VAEDecode",
+        "inputs": {
+            "samples": [
+                "3",
+                0
+            ],
+            "vae": [
+                "4",
+                2
+            ]
+        }
+    },
+    "9": {
+        "class_type": "SaveImage",
+        "inputs": {
+            "filename_prefix": "ComfyUI",
+            "images": [
+                "8",
+                0
+            ]
+        }
+    }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_speedup.json
new file mode 100644
index 000000000..530b022fc
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_speedup.json
@@ -0,0 +1,214 @@
+{
+  "6": {
+    "inputs": {
+      "text": "a female character with long, flowing hair that appears to be made of ethereal, swirling patterns resembling the Northern Lights or Aurora Borealis. The background is dominated by deep blues and purples, creating a mysterious and dramatic atmosphere. The character's face is serene, with pale skin and striking features. She wears a dark-colored outfit with subtle patterns. The overall style of the artwork is reminiscent of fantasy or supernatural genres",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "11": {
+    "inputs": {
+      "clip_name1": "clip_g.safetensors",
+      "clip_name2": "clip_l.safetensors",
+      "clip_name3": "t5xxl_fp8_e4m3fn.safetensors"
+    },
+    "class_type": "TripleCLIPLoader",
+    "_meta": {
+      "title": "TripleCLIPLoader"
+    }
+  },
+  "67": {
+    "inputs": {
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningZeroOut",
+    "_meta": {
+      "title": "ConditioningZeroOut"
+    }
+  },
+  "68": {
+    "inputs": {
+      "start": 0.1,
+      "end": 1,
+      "conditioning": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "69": {
+    "inputs": {
+      "conditioning_1": [
+        "68",
+        0
+      ],
+      "conditioning_2": [
+        "70",
+        0
+      ]
+    },
+    "class_type": "ConditioningCombine",
+    "_meta": {
+      "title": "Conditioning (Combine)"
+    }
+  },
+  "70": {
+    "inputs": {
+      "start": 0,
+      "end": 0.1,
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "71": {
+    "inputs": {
+      "text": "bad quality, poor quality, doll, disfigured, jpg, toy, bad anatomy, missing limbs, missing fingers, 3d, cgi",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Negative Prompt)"
+    }
+  },
+  "135": {
+    "inputs": {
+      "width": 768,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "233": {
+    "inputs": {
+      "images": [
+        "282",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "252": {
+    "inputs": {
+      "ckpt_name": "sd3_medium.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "271": {
+    "inputs": {
+      "seed": 1,
+      "steps": 28,
+      "cfg": 4.5,
+      "sampler_name": "dpmpp_2m",
+      "scheduler": "sgm_uniform",
+      "denoise": 1,
+      "model": [
+        "283",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "69",
+        0
+      ],
+      "latent_image": [
+        "135",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "277": {
+    "inputs": {
+      "torchcompile_booster": [
+        "284",
+        0
+      ]
+    },
+    "class_type": "OneDiffModelBooster",
+    "_meta": {
+      "title": "Apply Model Booster - OneDiff"
+    }
+  },
+  "282": {
+    "inputs": {
+      "samples": [
+        "271",
+        0
+      ],
+      "vae": [
+        "252",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "283": {
+    "inputs": {
+      "inplace": true,
+      "model": [
+        "252",
+        0
+      ],
+      "custom_booster": [
+        "277",
+        0
+      ]
+    },
+    "class_type": "ModelSpeedup",
+    "_meta": {
+      "title": "Model Speedup"
+    }
+  },
+  "284": {
+    "inputs": {
+      "fullgraph": false,
+      "dynamic": true,
+      "mode": "max-optimize:max-autotune:low-precision",
+      "docs_link": "[Note]: \nInstall-nexfort \nhttps://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort#install-nexfort"
+    },
+    "class_type": "OneDiffNexfortBooster",
+    "_meta": {
+      "title": "Nexfort Booster - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_vae_speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_vae_speedup.json
new file mode 100644
index 000000000..586aad7b2
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/nexfort/sd3_unet_vae_speedup.json
@@ -0,0 +1,255 @@
+{
+  "6": {
+    "inputs": {
+      "text": "a female character with long, flowing hair that appears to be made of ethereal, swirling patterns resembling the Northern Lights or Aurora Borealis. The background is dominated by deep blues and purples, creating a mysterious and dramatic atmosphere. The character's face is serene, with pale skin and striking features. She wears a dark-colored outfit with subtle patterns. The overall style of the artwork is reminiscent of fantasy or supernatural genres",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "11": {
+    "inputs": {
+      "clip_name1": "clip_g.safetensors",
+      "clip_name2": "clip_l.safetensors",
+      "clip_name3": "t5xxl_fp8_e4m3fn.safetensors"
+    },
+    "class_type": "TripleCLIPLoader",
+    "_meta": {
+      "title": "TripleCLIPLoader"
+    }
+  },
+  "67": {
+    "inputs": {
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningZeroOut",
+    "_meta": {
+      "title": "ConditioningZeroOut"
+    }
+  },
+  "68": {
+    "inputs": {
+      "start": 0.1,
+      "end": 1,
+      "conditioning": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "69": {
+    "inputs": {
+      "conditioning_1": [
+        "68",
+        0
+      ],
+      "conditioning_2": [
+        "70",
+        0
+      ]
+    },
+    "class_type": "ConditioningCombine",
+    "_meta": {
+      "title": "Conditioning (Combine)"
+    }
+  },
+  "70": {
+    "inputs": {
+      "start": 0,
+      "end": 0.1,
+      "conditioning": [
+        "71",
+        0
+      ]
+    },
+    "class_type": "ConditioningSetTimestepRange",
+    "_meta": {
+      "title": "ConditioningSetTimestepRange"
+    }
+  },
+  "71": {
+    "inputs": {
+      "text": "bad quality, poor quality, doll, disfigured, jpg, toy, bad anatomy, missing limbs, missing fingers, 3d, cgi",
+      "clip": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Negative Prompt)"
+    }
+  },
+  "135": {
+    "inputs": {
+      "width": 768,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "233": {
+    "inputs": {
+      "images": [
+        "282",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "252": {
+    "inputs": {
+      "ckpt_name": "sd3_medium.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "271": {
+    "inputs": {
+      "seed": 1,
+      "steps": 28,
+      "cfg": 4.5,
+      "sampler_name": "dpmpp_2m",
+      "scheduler": "sgm_uniform",
+      "denoise": 1,
+      "model": [
+        "285",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "69",
+        0
+      ],
+      "latent_image": [
+        "135",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "282": {
+    "inputs": {
+      "samples": [
+        "271",
+        0
+      ],
+      "vae": [
+        "287",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "285": {
+    "inputs": {
+      "inplace": true,
+      "model": [
+        "252",
+        0
+      ],
+      "custom_booster": [
+        "288",
+        0
+      ]
+    },
+    "class_type": "ModelSpeedup",
+    "_meta": {
+      "title": "Model Speedup"
+    }
+  },
+  "287": {
+    "inputs": {
+      "inplace": true,
+      "vae": [
+        "252",
+        2
+      ],
+      "custom_booster": [
+        "290",
+        0
+      ]
+    },
+    "class_type": "VaeSpeedup",
+    "_meta": {
+      "title": "VAE Speedup"
+    }
+  },
+  "288": {
+    "inputs": {
+      "torchcompile_booster": [
+        "289",
+        0
+      ]
+    },
+    "class_type": "OneDiffModelBooster",
+    "_meta": {
+      "title": "Apply Model Booster - OneDiff"
+    }
+  },
+  "289": {
+    "inputs": {
+      "fullgraph": false,
+      "dynamic": true,
+      "mode": "max-optimize:max-autotune:low-precision",
+      "docs_link": "[Note]: \nInstall-nexfort \nhttps://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort#install-nexfort"
+    },
+    "class_type": "OneDiffNexfortBooster",
+    "_meta": {
+      "title": "Nexfort Booster - OneDiff"
+    }
+  },
+  "290": {
+    "inputs": {
+      "torchcompile_booster": [
+        "291",
+        0
+      ]
+    },
+    "class_type": "OneDiffModelBooster",
+    "_meta": {
+      "title": "Apply Model Booster - OneDiff"
+    }
+  },
+  "291": {
+    "inputs": {
+      "fullgraph": false,
+      "dynamic": true,
+      "mode": "max-autotune:benchmark:low-precision",
+      "docs_link": "[Note]: \nInstall-nexfort \nhttps://github.com/siliconflow/onediff/tree/main/src/onediff/infer_compiler/backends/nexfort#install-nexfort"
+    },
+    "class_type": "OneDiffNexfortBooster",
+    "_meta": {
+      "title": "Nexfort Booster - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_IPAdapter_plus/ipadapter_advanced.json b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
new file mode 100644
index 000000000..e4bf7fe71
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_IPAdapter_plus/ipadapter_advanced.json
@@ -0,0 +1,181 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 30,
+      "cfg": 6.5,
+      "sampler_name": "ddpm",
+      "scheduler": "karras",
+      "denoise": 1,
+      "model": [
+        "14",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "in a peaceful spring morning a woman wearing a white shirt is sitting in a park on a bench\n\nhigh quality, detailed, diffuse light",
+      "clip": [
+        "18",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "blurry, noisy, messy, lowres, jpeg, artifacts, ill, distorted, malformed",
+      "clip": [
+        "18",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "18",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "IPAdapter",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "12": {
+    "inputs": {
+      "image": "sam_resize.png",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "14": {
+    "inputs": {
+      "weight": 0.8,
+      "weight_type": "linear",
+      "combine_embeds": "concat",
+      "start_at": 0,
+      "end_at": 1,
+      "embeds_scaling": "V only",
+      "model": [
+        "18",
+        0
+      ],
+      "ipadapter": [
+        "15",
+        0
+      ],
+      "image": [
+        "17",
+        0
+      ],
+      "clip_vision": [
+        "16",
+        0
+      ]
+    },
+    "class_type": "IPAdapterAdvanced",
+    "_meta": {
+      "title": "IPAdapter Advanced"
+    }
+  },
+  "15": {
+    "inputs": {
+      "ipadapter_file": "ip-adapter-plus_sd15.safetensors"
+    },
+    "class_type": "IPAdapterModelLoader",
+    "_meta": {
+      "title": "IPAdapter Model Loader"
+    }
+  },
+  "16": {
+    "inputs": {
+      "clip_name": "SD1.5/pytorch_model.bin"
+    },
+    "class_type": "CLIPVisionLoader",
+    "_meta": {
+      "title": "Load CLIP Vision"
+    }
+  },
+  "17": {
+    "inputs": {
+      "interpolation": "LANCZOS",
+      "crop_position": "top",
+      "sharpening": 0.15,
+      "image": [
+        "12",
+        0
+      ]
+    },
+    "class_type": "PrepImageForClipVision",
+    "_meta": {
+      "title": "Prep Image For ClipVision"
+    }
+  },
+  "18": {
+    "inputs": {
+      "ckpt_name": "sd15/020.realisticVisionV51_v51VAE.safetensors",
+      "vae_speedup": "disable"
+    },
+    "class_type": "OneDiffCheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_InstantID/instantid_posed_speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_InstantID/instantid_posed_speedup.json
new file mode 100644
index 000000000..f04381895
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/ComfyUI_InstantID/instantid_posed_speedup.json
@@ -0,0 +1,202 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 30,
+      "cfg": 4.5,
+      "sampler_name": "ddpm",
+      "scheduler": "karras",
+      "denoise": 1,
+      "model": [
+        "60",
+        0
+      ],
+      "positive": [
+        "60",
+        1
+      ],
+      "negative": [
+        "60",
+        2
+      ],
+      "latent_image": [
+        "69",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "70",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "11": {
+    "inputs": {
+      "instantid_file": "ip-adapter.bin"
+    },
+    "class_type": "InstantIDModelLoader",
+    "_meta": {
+      "title": "Load InstantID Model"
+    }
+  },
+  "13": {
+    "inputs": {
+      "image": "daydreaming.jpg",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "15": {
+    "inputs": {
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
+  },
+  "16": {
+    "inputs": {
+      "control_net_name": "instantid/diffusion_pytorch_model.safetensors"
+    },
+    "class_type": "ControlNetLoader",
+    "_meta": {
+      "title": "Load ControlNet Model"
+    }
+  },
+  "38": {
+    "inputs": {
+      "provider": "CPU"
+    },
+    "class_type": "InstantIDFaceAnalysis",
+    "_meta": {
+      "title": "InstantID Face Analysis"
+    }
+  },
+  "39": {
+    "inputs": {
+      "text": "comic character. graphic illustration, comic art, graphic novel art, vibrant, highly detailed",
+      "clip": [
+        "70",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "40": {
+    "inputs": {
+      "text": "photograph, deformed, glitch, noisy, realistic, stock photo",
+      "clip": [
+        "70",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "60": {
+    "inputs": {
+      "weight": 0.8,
+      "start_at": 0,
+      "end_at": 1,
+      "instantid": [
+        "11",
+        0
+      ],
+      "insightface": [
+        "38",
+        0
+      ],
+      "control_net": [
+        "16",
+        0
+      ],
+      "image": [
+        "13",
+        0
+      ],
+      "model": [
+        "70",
+        0
+      ],
+      "positive": [
+        "39",
+        0
+      ],
+      "negative": [
+        "40",
+        0
+      ],
+      "image_kps": [
+        "67",
+        0
+      ]
+    },
+    "class_type": "ApplyInstantID",
+    "_meta": {
+      "title": "Apply InstantID"
+    }
+  },
+  "67": {
+    "inputs": {
+      "image": "example.png",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "69": {
+    "inputs": {
+      "pixels": [
+        "67",
+        0
+      ],
+      "vae": [
+        "70",
+        2
+      ]
+    },
+    "class_type": "VAEEncode",
+    "_meta": {
+      "title": "VAE Encode"
+    }
+  },
+  "70": {
+    "inputs": {
+      "ckpt_name": "sdxl/dreamshaperXL_v21TurboDPMSDE.safetensors",
+      "vae_speedup": "disable"
+    },
+    "class_type": "OneDiffCheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_multiple_speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_multiple_speedup.json
new file mode 100644
index 000000000..77002cb59
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_multiple_speedup.json
@@ -0,0 +1,146 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 20,
+      "cfg": 8,
+      "sampler_name": "euler",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "10",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 768,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "masterpiece best quality girl, hanfu",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "bad hands",
+      "clip": [
+        "10",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "13",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "10": {
+    "inputs": {
+      "lora_name": "sd1.5/hanfu.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "11",
+        0
+      ],
+      "clip": [
+        "11",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  },
+  "11": {
+    "inputs": {
+      "lora_name": "sd1.5/.blindbox.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "13",
+        0
+      ],
+      "clip": [
+        "13",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  },
+  "13": {
+    "inputs": {
+      "ckpt_name": "v1-5-pruned-emaonly.ckpt",
+      "vae_speedup": "disable"
+    },
+    "class_type": "OneDiffCheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_speedup.json
new file mode 100644
index 000000000..73f635ed3
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/lora_speedup.json
@@ -0,0 +1,127 @@
+{
+  "3": {
+    "inputs": {
+      "seed": 1,
+      "steps": 20,
+      "cfg": 8,
+      "sampler_name": "euler",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "14",
+        0
+      ],
+      "positive": [
+        "6",
+        0
+      ],
+      "negative": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "5",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "5": {
+    "inputs": {
+      "width": 512,
+      "height": 768,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "6": {
+    "inputs": {
+      "text": "masterpiece best quality girl, hanfu",
+      "clip": [
+        "14",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "7": {
+    "inputs": {
+      "text": "bad hands",
+      "clip": [
+        "14",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "3",
+        0
+      ],
+      "vae": [
+        "11",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "11": {
+    "inputs": {
+      "ckpt_name": "v1-5-pruned-emaonly.ckpt",
+      "vae_speedup": "disable"
+    },
+    "class_type": "OneDiffCheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint - OneDiff"
+    }
+  },
+  "14": {
+    "inputs": {
+      "lora_name": "sd1.5/hanfu.safetensors",
+      "strength_model": 0.5,
+      "strength_clip": 0.5,
+      "model": [
+        "11",
+        0
+      ],
+      "clip": [
+        "11",
+        1
+      ]
+    },
+    "class_type": "LoraLoader",
+    "_meta": {
+      "title": "Load LoRA"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/sdxl-control-lora-speedup.json b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/sdxl-control-lora-speedup.json
new file mode 100644
index 000000000..5ca999356
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/resources/workflows/oneflow/sdxl-control-lora-speedup.json
@@ -0,0 +1,153 @@
+{
+  "1": {
+    "inputs": {
+      "seed": 1,
+      "steps": 1,
+      "cfg": 8,
+      "sampler_name": "euler_ancestral",
+      "scheduler": "normal",
+      "denoise": 1,
+      "model": [
+        "2",
+        0
+      ],
+      "positive": [
+        "10",
+        0
+      ],
+      "negative": [
+        "10",
+        1
+      ],
+      "latent_image": [
+        "46",
+        0
+      ]
+    },
+    "class_type": "KSampler",
+    "_meta": {
+      "title": "KSampler"
+    }
+  },
+  "2": {
+    "inputs": {
+      "ckpt_name": "sd_xl_base_1.0.safetensors"
+    },
+    "class_type": "CheckpointLoaderSimple",
+    "_meta": {
+      "title": "Load Checkpoint"
+    }
+  },
+  "4": {
+    "inputs": {
+      "text": "ironman, 4k",
+      "clip": [
+        "2",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "Positive Prompt"
+    }
+  },
+  "5": {
+    "inputs": {
+      "text": "black and white",
+      "clip": [
+        "2",
+        1
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "Negative Prompt"
+    }
+  },
+  "6": {
+    "inputs": {
+      "samples": [
+        "1",
+        0
+      ],
+      "vae": [
+        "2",
+        2
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "8": {
+    "inputs": {
+      "filename_prefix": "comfyui-clora-canny",
+      "images": [
+        "6",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "10": {
+    "inputs": {
+      "strength": 1,
+      "start_percent": 0,
+      "end_percent": 1,
+      "positive": [
+        "4",
+        0
+      ],
+      "negative": [
+        "5",
+        0
+      ],
+      "control_net": [
+        "47",
+        0
+      ],
+      "image": [
+        "43",
+        0
+      ]
+    },
+    "class_type": "ControlNetApplyAdvanced",
+    "_meta": {
+      "title": "Apply ControlNet (Advanced)"
+    }
+  },
+  "43": {
+    "inputs": {
+      "image": "control/sdxl-unet-control-lora-speedup.png",
+      "upload": "image"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "46": {
+    "inputs": {
+      "width": 512,
+      "height": 512,
+      "batch_size": 1
+    },
+    "class_type": "EmptyLatentImage",
+    "_meta": {
+      "title": "Empty Latent Image"
+    }
+  },
+  "47": {
+    "inputs": {
+      "control_net_name": "control-lora-openposeXL2-rank256.safetensors"
+    },
+    "class_type": "OneDiffControlNetLoader",
+    "_meta": {
+      "title": "Load ControlNet Model - OneDiff"
+    }
+  }
+}
\ No newline at end of file
diff --git a/onediff_comfy_nodes/benchmarks/scripts/install_env.sh b/onediff_comfy_nodes/benchmarks/scripts/install_env.sh
new file mode 100644
index 000000000..fb7946e6e
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/install_env.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+if [ -z "$1" ]; then
+  echo "Usage: $0 <directory>"
+  echo "Please provide the directory where ComfyUI should be installed."
+  exit 1
+fi
+
+COMFYUI_ROOT=$1
+CUSTOM_NODES=$COMFYUI_ROOT/custom_nodes
+
+if [ ! -d "$COMFYUI_ROOT" ]; then
+  echo "Error: Directory $COMFYUI_ROOT does not exist."
+  exit 1
+fi
+
+# comfyui_controlnet_aux  ComfyUI_InstantID  ComfyUI_IPAdapter_plus  PuLID_ComfyUI
+ln -s /share_nfs/hf_models/comfyui_resources/custom_nodes/* $CUSTOM_NODES/
+
+echo "Installing dependencies..."
+if [ "$CI" = "1" ]; then
+  echo "Detected CI environment. Skipping local environment-specific dependencies."
+else
+  echo "Detected local environment. Installing local environment-specific dependencies."
+  pip install -r $CUSTOM_NODES/ComfyUI_InstantID/requirements.txt
+  pip install -r $CUSTOM_NODES/PuLID_ComfyUI/requirements.txt
+fi
+
+echo "Installing common dependencies..."
+pip install websocket-client==1.8.0 numpy==1.26.4 scikit-image -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip install nexfort
diff --git a/onediff_comfy_nodes/benchmarks/scripts/run_all_tests.sh b/onediff_comfy_nodes/benchmarks/scripts/run_all_tests.sh
new file mode 100644
index 000000000..ca509e70e
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/run_all_tests.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+echo "Starting test execution..."
+
+echo "Running run_oneflow_case_ci.sh"
+bash scripts/run_oneflow_case_ci.sh
+
+echo "Running run_nexfort_case_ci.sh"
+bash scripts/run_nexfort_case_ci.sh
+
+if [ "$CI" = "1" ]; then
+    echo "Detected CI environment. Skipping run_oneflow_case_local.sh."
+else
+    echo "Running run_oneflow_case_local.sh"
+    bash scripts/run_oneflow_case_local.sh
+fi
+
+echo "All tests have been executed successfully."
diff --git a/onediff_comfy_nodes/benchmarks/scripts/run_nexfort_case_ci.sh b/onediff_comfy_nodes/benchmarks/scripts/run_nexfort_case_ci.sh
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/run_nexfort_case_ci.sh
@@ -0,0 +1 @@
+
diff --git a/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_ci.sh b/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_ci.sh
new file mode 100644
index 000000000..29971e623
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_ci.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -e
+
+STANDARD_OUTPUT=/share_nfs/hf_models/comfyui_resources/standard_output
+COMFY_PORT=8188
+WORKFLOW_DIR=resources/workflows/oneflow
+
+python3 scripts/text_to_image.py \
+    --comfy-port $COMFY_PORT \
+    -w $WORKFLOW_DIR/sdxl-control-lora-speedup.json
+
+# # Baseline
+# python3 scripts/text_to_image.py \
+#     -w resources/baseline/lora.json resources/baseline/lora_multiple.json \
+#     --output-images
+python3 scripts/text_to_image.py \
+    --comfy-port $COMFY_PORT \
+    -w $WORKFLOW_DIR/lora_speedup.json $WORKFLOW_DIR/lora_multiple_speedup.json \
+    --baseline-dir $STANDARD_OUTPUT/test_lora_speedup
+
+# # Baseline
+# python3 scripts/text_to_image.py \
+#      --comfy-port $COMFY_PORT \
+#     -w resources/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json \
+#     --output-images
+python3 scripts/text_to_image.py \
+    --comfy-port $COMFY_PORT \
+    -w $WORKFLOW_DIR/ComfyUI_IPAdapter_plus/ipadapter_advanced.json \
+    --baseline-dir $STANDARD_OUTPUT/test_ipa
+# --output-images \
diff --git a/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_local.sh b/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_local.sh
new file mode 100644
index 000000000..f756c7346
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/run_oneflow_case_local.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+set -e
+
+COMFY_PORT=8188
+STANDARD_OUTPUT=/share_nfs/hf_models/comfyui_resources/standard_output
+WORKFLOW_DIR=resources/workflows/oneflow
+
+# # # Baseline
+# # python3 scripts/text_to_image.py \
+# #     --comfy-port $COMFY_PORT \
+# #     -w resources/baseline/ComfyUI_InstantID/instantid_posed.json \
+# #     --output-images
+python3 scripts/text_to_image.py \
+    --comfy-port $COMFY_PORT \
+    -w $WORKFLOW_DIR/ComfyUI_InstantID/instantid_posed_speedup.json \
+    --output-images
diff --git a/onediff_comfy_nodes/benchmarks/scripts/run_text_to_image.sh b/onediff_comfy_nodes/benchmarks/scripts/run_text_to_image.sh
new file mode 100644
index 000000000..1f87f9706
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/run_text_to_image.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# This is a shell script to run the text-to-image benchmark.
+
+# Activate virtual environment (if needed)
+# source venv/bin/activate
+export WORKFLOW_DIR=resources/workflows
+# Run the Python script
+python3 scripts/text_to_image.py -w $WORKFLOW_DIR/example_workflow_api.json --output-images
diff --git a/onediff_comfy_nodes/benchmarks/scripts/text_to_image.py b/onediff_comfy_nodes/benchmarks/scripts/text_to_image.py
new file mode 100644
index 000000000..8da40ec3f
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/scripts/text_to_image.py
@@ -0,0 +1,153 @@
+import argparse
+import os
+import sys
+import time
+from io import BytesIO
+from typing import List, Union
+
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+
+sys.path.append("./src")
+
+from core.log_utils import setup_logging
+from core.service_client import comfy_client_context
+from input_registration import dispatch_generator
+
+DEFAULT_HOST = "127.0.0.1"
+DEFAULT_COMFY_PORT = "8188"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Test ComfyUI workflow")
+    parser.add_argument(
+        "-w", "--workflow", type=str, nargs="+", required=True, help="Workflow file(s)"
+    )
+    parser.add_argument(
+        "--listen", type=str, default=DEFAULT_HOST, help="Service listen address"
+    )
+    parser.add_argument(
+        "--comfy-port",
+        type=str,
+        default=DEFAULT_COMFY_PORT,
+        help="ComfyUI service port",
+    )
+    parser.add_argument(
+        "--output-images", action="store_true", help="Enable output of images."
+    )
+    parser.add_argument(
+        "--baseline-dir", type=str, help="Directory for baseline output."
+    )
+    parser.add_argument(
+        "--ssim-threshold",
+        type=float,
+        default=0.5,
+        help="SSIM threshold for image comparison.",
+    )
+    return parser.parse_args()
+
+
+def save_image(image_data: bytes, output_dir: str, image_name: str) -> str:
+    os.makedirs(output_dir, exist_ok=True)
+    image_path = os.path.join(output_dir, image_name)
+    Image.open(BytesIO(image_data)).save(image_path)
+    return image_path
+
+
+def calculate_ssim(image1: Image.Image, image2: Image.Image) -> float:
+    image1_np = np.array(image1.convert("RGB"))
+    image2_np = np.array(image2.convert("RGB"))
+    assert image1_np.shape == image2_np.shape, "Images must have the same dimensions"
+    return ssim(image1_np, image2_np, channel_axis=2)
+
+
+class WorkflowProcessor:
+    def __init__(
+        self,
+        output_images: bool,
+        output_dir: str,
+        baseline_dir: str,
+        logger,
+        ssim_threshold,
+    ):
+        self.output_images = output_images
+        self.output_dir = output_dir
+        self.baseline_dir = baseline_dir
+        self.logger = logger
+        self.ssim_threshold = ssim_threshold
+
+    def process_image(self, image_data: bytes, index: int) -> None:
+        pil_image = Image.open(BytesIO(image_data))
+        self.logger.info(
+            f"Image Size - Height: {pil_image.height}px, Width: {pil_image.width}px"
+        )
+        assert np.array(pil_image).any() != 0, "Image is blank"
+
+        if self.output_images:
+            image_path = save_image(image_data, self.output_dir, f"image_{index}.png")
+            self.logger.info(f"Saved image to: {image_path}")
+
+        if self.baseline_dir:
+            baseline_image_path = os.path.join(self.baseline_dir, f"image_{index}.png")
+            baseline_image = Image.open(baseline_image_path)
+            ssim_value = calculate_ssim(pil_image, baseline_image)
+            self.logger.info(f"SSIM value with baseline: {ssim_value}")
+            assert ssim_value > self.ssim_threshold
+
+
+def run_workflow(
+    workflow: Union[str, List[str]],
+    comfy_port: str = DEFAULT_COMFY_PORT,
+    output_images: bool = True,
+    baseline_dir: str = None,
+    ssim_threshold: float = 0.5,
+) -> None:
+    logger, result_dir = setup_logging(exp_name="exp")
+    logger.info(f"Result directory: {result_dir}")
+
+    processor = WorkflowProcessor(
+        output_images,
+        os.path.join(result_dir, "imgs"),
+        baseline_dir,
+        logger,
+        ssim_threshold,
+    )
+
+    with comfy_client_context(port=comfy_port) as client:
+        logger.info(f"Testing workflows: {workflow}")
+        for i, comfy_graph in enumerate(dispatch_generator(workflow)):
+            start_time = time.time()
+            images = client.get_images(comfy_graph.graph)
+            end_time = time.time()
+
+            if not images:
+                logger.error("No images generated")
+                raise ValueError("No images generated")
+            if len(images) != 1:
+                logger.error(
+                    f"Expected 1 image, but got {len(images)} images. Batch Size == 1"
+                )
+                raise ValueError(
+                    f"Expected 1 image, but got {len(images)} images. Batch Size == 1"
+                )
+
+            for images_output in images.values():
+                for image_data in images_output:
+                    processor.process_image(image_data, i)
+
+            e2e_time = end_time - start_time
+            logger.info(f"Workflow {i} E2E:  {e2e_time:.2f} seconds")
+            # if i>0: # TODO refine
+            #     assert e2e_time < 10 # sec
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    run_workflow(
+        args.workflow,
+        args.comfy_port,
+        args.output_images,
+        args.baseline_dir,
+        args.ssim_threshold,
+    )
diff --git a/onediff_comfy_nodes/benchmarks/src/core/__init__.py b/onediff_comfy_nodes/benchmarks/src/core/__init__.py
new file mode 100644
index 000000000..f8ddb3018
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/src/core/__init__.py
@@ -0,0 +1,3 @@
+from .log_utils import logger, setup_logging
+from .registry import create_generator_registry
+from .service_client import ComfyClient, ComfyGraph
diff --git a/onediff_comfy_nodes/benchmarks/src/core/log_utils.py b/onediff_comfy_nodes/benchmarks/src/core/log_utils.py
new file mode 100644
index 000000000..68c0b4426
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/src/core/log_utils.py
@@ -0,0 +1,52 @@
+import logging
+import os
+from pathlib import Path
+
+from onediff.utils.log_utils import ConfigurableLogger
+
+
+def increment_path(path, exist_ok=False, sep="", mkdir=False):
+    """
+    Generates an incremented file or directory path if it exists, with optional mkdir; args: path, exist_ok=False,
+    sep="", mkdir=False.
+
+    Example: runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc
+    """
+    path = Path(path)  # os-agnostic
+    if path.exists() and not exist_ok:
+        path, suffix = (
+            (path.with_suffix(""), path.suffix) if path.is_file() else (path, "")
+        )
+
+        # Method 1
+        for n in range(2, 9999):
+            p = f"{path}{sep}{n}{suffix}"  # increment path
+            if not os.path.exists(p):  #
+                break
+        path = Path(p)
+
+    if mkdir:
+        path.mkdir(parents=True, exist_ok=True)  # make directory
+
+    return path
+
+
+logger = ConfigurableLogger()
+
+
+def setup_logging(output_dir="results", exp_name="exp"):
+    """
+    Set up logging for an experiment.
+
+    Args:
+        output_dir (str): The base directory where experiment results will be stored.
+        exp_name (str): The name of the experiment.
+
+    Returns:
+        tuple: A tuple containing the logger instance and the experiment directory path.
+    """
+    results_dir = Path(output_dir)
+    exp_dir = increment_path(results_dir / exp_name, exist_ok=False, mkdir=True)
+    # Configure logging
+    logger.configure_logging(name="benchmarks", level=logging.INFO, log_dir=exp_dir)
+    return logger, exp_dir
diff --git a/onediff_comfy_nodes/benchmarks/src/core/registry.py b/onediff_comfy_nodes/benchmarks/src/core/registry.py
new file mode 100644
index 000000000..91386771e
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/src/core/registry.py
@@ -0,0 +1,31 @@
+import itertools
+from typing import Callable, Dict, List, NamedTuple, Tuple, Union
+
+
+def create_generator_registry():
+    # Dictionary to hold registered constructors
+    generator_registry: Dict[str, Callable] = {}
+
+    def register(workflow_path: Union[List[str], str]) -> Callable:
+        def decorator(generator_function: Callable) -> Callable:
+            if isinstance(workflow_path, (List, Tuple)):
+                for workflow in workflow_path:
+                    generator_registry[workflow] = generator_function
+            else:
+                generator_registry[workflow_path] = generator_function
+            return generator_function
+
+        return decorator
+
+    def dispatch(workflow_path: Union[List[str], str], *args, **kwargs) -> NamedTuple:
+        if isinstance(workflow_path, (List, Tuple)):
+            return itertools.chain(
+                *[dispatch(w, *args, **kwargs) for w in workflow_path]
+            )
+        else:
+            generator = generator_registry.get(workflow_path)
+            if generator is None:
+                raise ValueError(f"No generator registered for {workflow_path}")
+            return generator(workflow_path, *args, **kwargs)
+
+    return register, dispatch
diff --git a/onediff_comfy_nodes/benchmarks/src/core/service_client.py b/onediff_comfy_nodes/benchmarks/src/core/service_client.py
new file mode 100644
index 000000000..494934805
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/src/core/service_client.py
@@ -0,0 +1,203 @@
+import json
+import threading
+import time
+import urllib.parse
+import urllib.request
+import uuid
+from contextlib import contextmanager
+
+import websocket  # NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
+
+from .log_utils import logger
+
+__all__ = ["comfy_client_context", "ComfyGraph", "ComfyClient"]
+
+
+class ComfyGraph:
+    def __init__(
+        self, graph: dict, sampler_nodes: list[str],
+    ):
+        self.graph = graph
+        self.sampler_nodes = sampler_nodes
+
+    def set_prompt(self, prompt, negative_prompt=None):
+        # Sets the prompt for the sampler nodes (eg. base and refiner)
+        for node in self.sampler_nodes:
+            prompt_node = self.graph[node]["inputs"]["positive"][0]
+            self.graph[prompt_node]["inputs"]["text"] = prompt
+            if negative_prompt:
+                negative_prompt_node = self.graph[node]["inputs"]["negative"][0]
+                self.graph[negative_prompt_node]["inputs"]["text"] = negative_prompt
+
+    def set_sampler_name(
+        self, sampler_name: str,
+    ):
+        # sets the sampler name for the sampler nodes (eg. base and refiner)
+        for node in self.sampler_nodes:
+            self.graph[node]["inputs"]["sampler_name"] = sampler_name
+
+    def set_scheduler(self, scheduler: str):
+        # sets the sampler name for the sampler nodes (eg. base and refiner)
+        for node in self.sampler_nodes:
+            self.graph[node]["inputs"]["scheduler"] = scheduler
+
+    def set_filename_prefix(self, prefix: str):
+        # sets the filename prefix for the save nodes
+        for node in self.graph:
+            if self.graph[node]["class_type"] == "SaveImage":
+                self.graph[node]["inputs"]["filename_prefix"] = prefix
+
+    def set_image_size(self, height: int, width: int, batch_size: int = 1):
+        for node in self.sampler_nodes:
+            size_node = self.graph[node]["inputs"]["latent_image"][0]
+            self.graph[size_node]["inputs"]["height"] = height
+            self.graph[size_node]["inputs"]["width"] = width
+            self.graph[size_node]["inputs"]["batch_size"] = batch_size
+
+
+@contextmanager
+def comfy_client_context(
+    listen="127.0.0.1",
+    port=30000,
+    client_id=str(uuid.uuid4()),
+    n_tries=5,
+    *args,
+    **kwargs,
+):
+    client = ComfyClient()
+    try:
+        client.connect(
+            listen=listen,
+            port=port,
+            client_id=client_id,
+            n_tries=n_tries,
+            *args,
+            **kwargs,
+        )
+        yield client
+    except Exception as e:
+        logger.error(f"Failed to connect to ComfyClient: {e}")
+        raise
+    finally:
+        logger.info("Closing connection to ComfyClient")
+        client.close()
+
+
+class ComfyClient:
+    def connect(
+        self, listen="127.0.0.1", port=30000, client_id=str(uuid.uuid4()), n_tries=5
+    ):
+        for i in range(n_tries):
+            time.sleep(4)
+            self.client_id = client_id
+            self.server_address = f"{listen}:{port}"
+            try:
+                ws = websocket.WebSocket()
+                ws.connect(f"ws://{self.server_address}/ws?clientId={self.client_id}")
+                self.ws = ws
+            except ConnectionRefusedError as e:
+                print(e)
+                print(f"({i+1}/{n_tries}) Retrying...")
+            else:
+                logger.info(f"Connected to server: {self.server_address}")
+                threading.Thread(
+                    target=self.fetch_system_stats_periodically, daemon=True
+                ).start()
+                break
+        if not self.ws:
+            raise RuntimeError(f"Could not connect to server: {self.server_address}")
+
+    def fetch_system_stats_periodically(self):
+        max_vram_used_gb = 0
+        last_print_time = time.time()
+        BYTES_TO_GB = 1024 * 1024 * 1024
+        poll_interval = 0.5  # sec
+        while True:
+            try:
+                stats = self.get_system_stats()
+                for device in stats["devices"]:
+                    vram_total = device["vram_total"]
+                    vram_free = device["vram_free"]
+                    vram_used = vram_total - vram_free
+
+                    vram_used_gb = vram_used / BYTES_TO_GB
+
+                    if vram_used_gb > max_vram_used_gb:
+                        max_vram_used_gb = vram_used_gb
+
+                current_time = time.time()
+                if current_time - last_print_time >= 2:
+                    logger.info(
+                        f"Current VRAM used: {vram_used_gb:.2f} GB\tMaximum VRAM used: {max_vram_used_gb:.2f} GB"
+                    )
+                    last_print_time = current_time
+
+                time.sleep(poll_interval)
+            except Exception as e:
+                print(f"{e=}")
+                break
+
+    def close(self):
+        self.ws.close()
+
+    def queue_prompt(self, prompt):
+        p = {"prompt": prompt, "client_id": self.client_id}
+        data = json.dumps(p).encode("utf-8")
+        req = urllib.request.Request(f"http://{self.server_address}/prompt", data=data)
+        return json.loads(urllib.request.urlopen(req).read())
+
+    def get_image(self, filename, subfolder, folder_type):
+        data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
+        url_values = urllib.parse.urlencode(data)
+        with urllib.request.urlopen(
+            f"http://{self.server_address}/view?{url_values}"
+        ) as response:
+            return response.read()
+
+    def get_history(self, prompt_id):
+        with urllib.request.urlopen(
+            f"http://{self.server_address}/history/{prompt_id}"
+        ) as response:
+            return json.loads(response.read())
+
+    def get_system_stats(self):
+        with urllib.request.urlopen(
+            f"http://{self.server_address}/system_stats"
+        ) as response:
+            return json.loads(response.read())
+
+    def get_images(self, graph, save=True):
+        prompt = graph.graph if isinstance(graph, ComfyGraph) else graph
+
+        if not save:
+            prompt_str = json.dumps(prompt)
+            prompt_str = prompt_str.replace("SaveImage", "PreviewImage")
+            prompt = json.loads(prompt_str)
+
+        prompt_id = self.queue_prompt(prompt)["prompt_id"]
+        output_images = {}
+        while True:
+            out = self.ws.recv()
+            if isinstance(out, str):
+                message = json.loads(out)
+                if message["type"] == "executing":
+                    data = message["data"]
+                    if data["node"] is None and data["prompt_id"] == prompt_id:
+                        break
+            else:
+                continue
+
+        history = self.get_history(prompt_id)[prompt_id]
+        for o in history["outputs"]:
+            for node_id in history["outputs"]:
+                node_output = history["outputs"][node_id]
+                if "images" in node_output:
+                    images_output = []
+                    for image in node_output["images"]:
+                        image_data = self.get_image(
+                            image["filename"], image["subfolder"], image["type"]
+                        )
+                        images_output.append(image_data)
+                    output_images[node_id] = images_output
+
+        return output_images
diff --git a/onediff_comfy_nodes/benchmarks/src/input_registration.py b/onediff_comfy_nodes/benchmarks/src/input_registration.py
new file mode 100644
index 000000000..aa82b79d5
--- /dev/null
+++ b/onediff_comfy_nodes/benchmarks/src/input_registration.py
@@ -0,0 +1,153 @@
+import json
+import os
+from typing import NamedTuple
+from core.registry import create_generator_registry
+from core.service_client import ComfyGraph
+
+WORKFLOW_DIR = "resources/workflows"
+FACE_IMAGE_DIR = "/share_nfs/hf_models/comfyui_resources/input/faces"
+POSE_IMAGE_DIR = "/share_nfs/hf_models/comfyui_resources/input/poses"
+
+class InputParams(NamedTuple):
+    graph: ComfyGraph
+
+
+def read_prompts(file_path="resources/prompts.txt"):
+    with open(file_path, "r", encoding="utf-8") as fp:
+        lines = fp.readlines()
+    return [line.strip() for line in lines if line.strip()]
+
+
+def get_all_images(
+    directory=FACE_IMAGE_DIR,
+    image_extensions=set([".jpg", ".jpeg", ".png", ".gif", ".bmp"]),
+):
+    all_files = [os.path.join(directory, f) for f in os.listdir(directory)]
+    image_files = [
+        f
+        for f in all_files
+        if os.path.isfile(f) and os.path.splitext(f)[1].lower() in image_extensions
+    ]
+    return image_files
+
+
+# Create register and get functions
+register_generator, dispatch_generator = create_generator_registry()
+
+
+@register_generator(f"{WORKFLOW_DIR}/example_workflow_api.json")
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["3"])
+    for height in [1024, 768, 512]:
+        for width in [1024, 768, 512]:
+            graph.set_image_size(height=height, width=width)
+            yield InputParams(graph=graph)
+
+
+SD3_WORKFLOWS = [
+    f"{WORKFLOW_DIR}/baseline/sd3_baseline.json",
+    f"{WORKFLOW_DIR}/nexfort/sd3_unet_speedup.json",
+    f"{WORKFLOW_DIR}/nexfort/sd3_unet_vae_speedup.json",
+]
+
+
+@register_generator(SD3_WORKFLOWS)
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["271"])
+    texts = read_prompts()
+    for height in [1024, 768, 512]:
+        for width in [1024, 768, 512]:
+            for text in texts[-5:]:
+                graph.set_prompt(prompt=text)
+                graph.set_image_size(height=height, width=width)
+                yield InputParams(graph=graph)
+
+
+@register_generator(f"{WORKFLOW_DIR}/oneflow/sdxl-control-lora-speedup.json")
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["1"])
+    yield InputParams(graph=graph)
+
+
+@register_generator(
+    [
+        f"{WORKFLOW_DIR}/baseline/ComfyUI_IPAdapter_plus/ipadapter_advanced.json",
+        f"{WORKFLOW_DIR}/oneflow/ComfyUI_IPAdapter_plus/ipadapter_advanced.json",
+    ]
+)
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["3"])
+    for image in get_all_images():
+        graph.graph["12"]["inputs"]["image"] = image
+        for height in [768, 512]:
+            for width in [768, 512]:
+                positive_prompt = "in a peaceful spring morning a woman wearing a white shirt is sitting in a park on a bench\n\nhigh quality, detailed, diffuse light"
+                negative_prompt = "blurry, noisy, messy, lowres, jpeg, artifacts, ill, distorted, malformed"
+                graph.set_prompt(positive_prompt, negative_prompt)
+                graph.set_image_size(height=height, width=width)
+                yield InputParams(graph=graph)
+
+
+@register_generator(
+    [
+        f"{WORKFLOW_DIR}/baseline/lora.json",
+        f"{WORKFLOW_DIR}/baseline/lora_multiple.json",
+        f"{WORKFLOW_DIR}/oneflow/lora_speedup.json",
+        f"{WORKFLOW_DIR}/oneflow/lora_multiple_speedup.json",
+    ]
+)
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["3"])
+    graph.set_prompt("masterpiece best quality girl, hanfu", "bad hands")
+    root_path = "sd15/"
+    checkpoint_nodes = []
+    for node in graph.graph.values():
+        if node["class_type"] in [
+            "CheckpointLoaderSimple",
+            "OneDiffCheckpointLoaderSimple",
+        ]:
+            checkpoint_nodes.append(node)
+    assert len(checkpoint_nodes) == 1
+
+    for file_name in [
+        "020.realisticVisionV51_v51VAE.safetensors",
+        "v1-5-pruned-emaonly.ckpt",
+    ]:
+        checkpoint_path = os.path.join(root_path, file_name)
+        checkpoint_nodes[0]["inputs"]["ckpt_name"] = checkpoint_path
+        yield InputParams(graph=graph)
+
+
+@register_generator(
+    [
+        f"{WORKFLOW_DIR}/baseline/ComfyUI_InstantID/instantid_posed.json",
+        f"{WORKFLOW_DIR}/oneflow/ComfyUI_InstantID/instantid_posed_speedup.json",
+    ]
+)
+def _(workflow_path, *args, **kwargs):
+    with open(workflow_path, "r") as fp:
+        workflow = json.load(fp)
+    graph = ComfyGraph(graph=workflow, sampler_nodes=["3"])
+
+    face_imgs = get_all_images(FACE_IMAGE_DIR)
+    pose_imgs = get_all_images(POSE_IMAGE_DIR)
+    for face_img in face_imgs:
+        for pose_img in pose_imgs:
+            # print(f'{face_img=} {pose_img=}')
+            graph.graph["13"]["inputs"]["image"] = face_img
+            graph.graph["67"]["inputs"]["image"] = pose_img
+            yield InputParams(graph=graph)
diff --git a/onediff_comfy_nodes/extras_nodes/nodes_nexfort_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_nexfort_booster.py
index bdd0535e0..a60f36c60 100644
--- a/onediff_comfy_nodes/extras_nodes/nodes_nexfort_booster.py
+++ b/onediff_comfy_nodes/extras_nodes/nodes_nexfort_booster.py
@@ -5,10 +5,10 @@
 # https://github.com/siliconflow/nexfort?tab=readme-ov-file#suggested-combinations-of-compiler-modes
 compiler_modes = collections.OrderedDict(
     {
+        "max-optimize:max-autotune:low-precision": "This will deliver a good performance and adapt quickly to shape changes.",
+        "max-optimize:max-autotune:low-precision:freezing:benchmark": "",
         "jit:disable-runtime-fusion:low-precision": "This compiles super quickly, but the performance might not be optimized very noticeably.",
         "jit:benchmark:low-precision:freezing:cudagraphs": "This compiles the model very quickly, but the performance might be not as good as `TorchInductor` optimized models.",
-        "max-optimize:max-autotune:low-precision": "This will deliver a good performance and adapt quickly to shape changes.",
-        # "max-optimize:max-autotune:low-precision": "",
         "max-autotune:benchmark:low-precision:cudagraphs": "This is the most suggested combination of compiler modes. It will deliver a good balance between performance and compilation time.",
         "max-optimize:max-autotune:benchmark:low-precision:freezing:cudagraphs": "This is the most aggressive combination of compiler modes. It will deliver the best performance but might slow down the compilation significantly.",
     }
diff --git a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
index e1166e9cf..b4a1b3297 100644
--- a/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
+++ b/onediff_comfy_nodes/extras_nodes/nodes_oneflow_booster.py
@@ -20,15 +20,6 @@
     PatchBoosterExecutor,
 )
 from ..modules.oneflow.config import ONEDIFF_QUANTIZED_OPTIMIZED_MODELS
-from ..modules.oneflow.hijack_animatediff import animatediff_hijacker
-from ..modules.oneflow.hijack_comfyui_instantid import comfyui_instantid_hijacker
-from ..modules.oneflow.hijack_ipadapter_plus import ipadapter_plus_hijacker
-from ..modules.oneflow.hijack_model_management import model_management_hijacker
-from ..modules.oneflow.hijack_model_patcher import model_patch_hijacker
-from ..modules.oneflow.hijack_nodes import nodes_hijacker
-from ..modules.oneflow.hijack_samplers import samplers_hijack
-from ..modules.oneflow.hijack_utils import comfy_utils_hijack
-
 from ..modules.oneflow.utils import OUTPUT_FOLDER, load_graph, save_graph
 from ..modules import BoosterScheduler
 
@@ -37,14 +28,6 @@
         OnelineQuantizationBoosterExecutor,
     )  # type: ignore
 
-model_management_hijacker.hijack()  # add flow.cuda.empty_cache()
-nodes_hijacker.hijack()
-samplers_hijack.hijack()
-animatediff_hijacker.hijack()
-ipadapter_plus_hijacker.hijack()
-comfyui_instantid_hijacker.hijack()
-model_patch_hijacker.hijack()
-comfy_utils_hijack.hijack()
 
 import comfy_extras.nodes_video_model
 from nodes import CheckpointLoaderSimple
@@ -175,6 +158,7 @@ def deep_cache_convert(
         start_step,
         end_step,
     ):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         booster = BoosterScheduler(
             DeepcacheBoosterExecutor(
                 cache_interval=cache_interval,
@@ -325,6 +309,7 @@ def onediff_load_checkpoint(
         start_step=0,
         end_step=1000,
     ):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         # CheckpointLoaderSimple.load_checkpoint
         modelpatcher, clip, vae = self.load_checkpoint(ckpt_name)
         booster = BoosterScheduler(
@@ -396,6 +381,7 @@ def speedup(
         cache_name="svd",
         custom_booster: BoosterScheduler = None,
     ):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         if custom_booster:
             booster = custom_booster
             booster.inplace = inplace
@@ -425,6 +411,7 @@ def INPUT_TYPES(s):
     CATEGORY = "OneDiff"
 
     def load_graph(self, vae, graph):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         vae_model = vae.first_stage_model
         device = model_management.vae_offload_device()
         load_graph(vae_model, graph, device, subfolder="vae")
@@ -448,6 +435,7 @@ def INPUT_TYPES(s):
     OUTPUT_NODE = True
 
     def save_graph(self, images, vae, filename_prefix):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         vae_model = vae.first_stage_model
         vae_device = model_management.vae_offload_device()
         save_graph(vae_model, filename_prefix, vae_device, subfolder="vae")
@@ -473,6 +461,7 @@ def INPUT_TYPES(s):
     CATEGORY = "OneDiff"
 
     def load_graph(self, model, graph):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
 
         diffusion_model = model.model.diffusion_model
 
@@ -497,6 +486,7 @@ def INPUT_TYPES(s):
     OUTPUT_NODE = True
 
     def save_graph(self, samples, model, filename_prefix):
+        print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
         diffusion_model = model.model.diffusion_model
         save_graph(diffusion_model, filename_prefix, "cuda", subfolder="unet")
         return {}
@@ -550,6 +540,7 @@ def INPUT_TYPES(cls):
         CATEGORY = "OneDiff"
 
         def load_unet_int8(self, model_path):
+            print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
             from ..modules.oneflow.utils.onediff_quant_utils import (
                 replace_module_with_quantizable_module,
             )
@@ -588,6 +579,7 @@ def INPUT_TYPES(s):
         OUTPUT_NODE = True
 
         def quantize_model(self, model, output_dir, conv, linear):
+            print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
             from ..modules.oneflow.utils import quantize_and_save_model
 
             diffusion_model = model.model.diffusion_model
@@ -616,8 +608,10 @@ def INPUT_TYPES(s):
         CATEGORY = "OneDiff/Loaders"
         FUNCTION = "onediff_load_checkpoint"
 
+
         def onediff_load_checkpoint(self, ckpt_name, vae_speedup):
             modelpatcher, clip, vae = self.load_checkpoint(ckpt_name)
+            print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
             booster = BoosterScheduler(
                 OnelineQuantizationBoosterExecutor(
                     conv_percentage=100,
@@ -668,6 +662,7 @@ def onediff_load_checkpoint(
             self, ckpt_name, model_path, compile, vae_speedup,
         ):
             need_compile = compile == "enable"
+            print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
 
             modelpatcher, clip, vae = self.load_checkpoint(ckpt_name)
             # TODO fix by op.compile
@@ -720,6 +715,7 @@ def onediff_load_checkpoint(
             output_vae=True,
             output_clip=True,
         ):
+            print(f'Warning: {type(self).__name__} will be deleted. Please use it with caution.')
             modelpatcher, clip, vae = self.load_checkpoint(
                 ckpt_name, output_vae, output_clip
             )
diff --git a/onediff_comfy_nodes/modules/booster_cache.py b/onediff_comfy_nodes/modules/booster_cache.py
index 392dc5f41..6d2f307dc 100644
--- a/onediff_comfy_nodes/modules/booster_cache.py
+++ b/onediff_comfy_nodes/modules/booster_cache.py
@@ -2,24 +2,66 @@
 import traceback
 from collections import OrderedDict
 from comfy.model_patcher import ModelPatcher
+from functools import singledispatch
 from comfy.sd import VAE
 from onediff.torch_utils.module_operations import get_sub_module
 from onediff.utils.import_utils import is_oneflow_available
+from .._config import is_disable_oneflow_backend
 
-if is_oneflow_available():
-    from .oneflow.utils.booster_utils import is_using_oneflow_backend
 
+@singledispatch
+def switch_to_cached_model(new_model, cached_model):
+    raise NotImplementedError(type(new_model))
 
-def switch_to_cached_model(new_model: ModelPatcher, cache_model):
-    assert type(new_model.model) == type(cache_model)
+
+@switch_to_cached_model.register
+def _(new_model: ModelPatcher, cached_model):
+    assert type(new_model.model) == type(
+        cached_model
+    ), f"Model type mismatch: expected {type(cached_model)}, got {type(new_model.model)}"
+    for k, v in new_model.model.state_dict().items():
+        cached_v: torch.Tensor = get_sub_module(cached_model, k)
+        assert v.dtype == cached_v.dtype
+        cached_v.copy_(v)
+    new_model.model = cached_model
+    return new_model
+
+
+@switch_to_cached_model.register
+def _(new_model: VAE, cached_model):
+    assert type(new_model.first_stage_model) == type(cached_model)
     for k, v in new_model.model.state_dict().items():
-        cached_v: torch.Tensor = get_sub_module(cache_model, k)
+        cached_v: torch.Tensor = get_sub_module(cached_model, k)
         assert v.dtype == cached_v.dtype
         cached_v.copy_(v)
-    new_model.model = cache_model
+    new_model.first_stage_model = cached_model
     return new_model
 
 
+@singledispatch
+def get_cached_model(model):
+    return None
+    # raise NotImplementedError(type(model))
+
+
+@get_cached_model.register
+def _(model: ModelPatcher):
+    if is_oneflow_available() and not is_disable_oneflow_backend():
+        from .oneflow.utils.booster_utils import is_using_oneflow_backend
+
+        if is_using_oneflow_backend(model):
+            return None
+
+    return model.model
+
+
+@get_cached_model.register
+def _(model: VAE):
+    # TODO(TEST) if support cache
+    return None
+    # return model.first_stage_model
+
+
 class BoosterCacheService:
     _cache = OrderedDict()
 
@@ -27,9 +69,9 @@ def put(self, key, model):
         if key is None:
             return
         # oneflow backends output image error
-        if is_oneflow_available() and is_using_oneflow_backend(model):
-            return
-        self._cache[key] = model.model
+        cached_model = get_cached_model(model)
+        if cached_model:
+            self._cache[key] = cached_model
 
     def get(self, key, default=None):
         return self._cache.get(key, default)
diff --git a/onediff_comfy_nodes/modules/nexfort/booster_basic.py b/onediff_comfy_nodes/modules/nexfort/booster_basic.py
index 35dab257a..687aaedbd 100644
--- a/onediff_comfy_nodes/modules/nexfort/booster_basic.py
+++ b/onediff_comfy_nodes/modules/nexfort/booster_basic.py
@@ -16,7 +16,7 @@ class BasicNexFortBoosterExecutor(BoosterExecutor):
     # https://pytorch.org/docs/stable/_modules/torch.html#compile
     def __init__(
         self,
-        mode: str = "max-optimize:max-autotune:freezing:benchmark:cudagraphs",
+        mode: str = "max-optimize:max-autotune:low-precision",
         fullgraph=False,
         dynamic=None,
     ):
@@ -25,9 +25,9 @@ def __init__(
             "mode": mode,
             "dynamic": dynamic,
             "fullgraph": fullgraph,
-        }  # "memory_format": "channels_last"
-
+        }
         self.compile_fn = partial(compile, backend="nexfort", options=options)
+        self.options = options
 
     @singledispatchmethod
     def execute(self, model, ckpt_name=None, **kwargs):
@@ -47,6 +47,11 @@ def _(self, model, ckpt_name: Optional[str] = None, **kwargs):
     @execute.register(VAE)
     @torch.inference_mode()
     def _(self, model, ckpt_name: Optional[str] = None, **kwargs):
+        model.first_stage_model = apply_memory_format(
+            model.first_stage_model, torch.channels_last
+        )
+        print(f"{type(model)} apply compiled config: {self.options}")
+        # https://huggingface.co/blog/sd3#performance-optimizations-for-sd3
         model.first_stage_model.decode = self.compile_fn(model.first_stage_model.decode)
         return model
 
diff --git a/onediff_comfy_nodes/modules/oneflow/__init__.py b/onediff_comfy_nodes/modules/oneflow/__init__.py
index 11f11800c..019765f7f 100644
--- a/onediff_comfy_nodes/modules/oneflow/__init__.py
+++ b/onediff_comfy_nodes/modules/oneflow/__init__.py
@@ -3,3 +3,23 @@
 from .booster_deepcache import DeepcacheBoosterExecutor
 from .booster_patch import PatchBoosterExecutor
 from .patch_management.patch_for_oneflow import *
+
+from .hijack_animatediff import animatediff_hijacker
+from .hijack_comfyui_instantid import comfyui_instantid_hijacker
+from .hijack_ipadapter_plus import ipadapter_plus_hijacker
+from .hijack_model_management import model_management_hijacker
+from .hijack_model_patcher import model_patch_hijacker
+from .hijack_nodes import nodes_hijacker
+from .hijack_samplers import samplers_hijack
+from .hijack_utils import comfy_utils_hijack
+from .hijack_pulid_comfyui import pulid_comfyui_hijacker
+
+model_management_hijacker.hijack()  # add flow.cuda.empty_cache()
+nodes_hijacker.hijack()
+samplers_hijack.hijack()
+animatediff_hijacker.hijack()
+ipadapter_plus_hijacker.hijack()
+comfyui_instantid_hijacker.hijack()
+model_patch_hijacker.hijack()
+comfy_utils_hijack.hijack()
+pulid_comfyui_hijacker.hijack()
\ No newline at end of file
diff --git a/onediff_comfy_nodes/modules/oneflow/booster_basic.py b/onediff_comfy_nodes/modules/oneflow/booster_basic.py
index cf7773993..9db31327b 100644
--- a/onediff_comfy_nodes/modules/oneflow/booster_basic.py
+++ b/onediff_comfy_nodes/modules/oneflow/booster_basic.py
@@ -7,7 +7,9 @@
 from comfy.model_patcher import ModelPatcher
 from comfy.sd import VAE
 from onediff.infer_compiler import oneflow_compile
-from onediff.infer_compiler.backends.oneflow import OneflowDeployableModule as DeployableModule
+from onediff.infer_compiler.backends.oneflow import (
+    OneflowDeployableModule as DeployableModule,
+)
 
 from ..booster_interface import BoosterExecutor
 from .onediff_controlnet import OneDiffControlLora
@@ -41,11 +43,9 @@ def _(self, model: ModelPatcher, ckpt_name: Optional[str] = None, **kwargs):
 
         compiled_model = oneflow_compile(torch_model)
         model.model.diffusion_model = compiled_model
-        if ckpt_name:
-            graph_file = generate_graph_path(
-                f"{ckpt_name}_{type(model.model).__name__}", torch_model
-            )
-            set_compiled_options(compiled_model, graph_file)
+
+        graph_file = generate_graph_path(f"{type(model).__name__}", model=model.model)
+        set_compiled_options(compiled_model, graph_file)
 
         model.weight_inplace_update = True
         return model
@@ -65,9 +65,9 @@ def _(self, model, ckpt_name: Optional[str] = None, **kwargs) -> ControlNet:
 
         compiled_model = oneflow_compile(torch_model)
         model.control_model = compiled_model
-        if ckpt_name:
-            graph_file = generate_graph_path(ckpt_name, torch_model)
-            set_compiled_options(compiled_model, graph_file)
+
+        graph_file = generate_graph_path(ckpt_name, torch_model)
+        set_compiled_options(compiled_model, graph_file)
         return model
 
     @execute.register(VAE)
@@ -89,22 +89,20 @@ def _(self, model, ckpt_name: Optional[str] = None, **kwargs) -> VAE:
 
         compiled_model = oneflow_compile(torch_model)
         model.first_stage_model = compiled_model
-        if ckpt_name:
-            graph_file = generate_graph_path(ckpt_name, torch_model)
-            set_compiled_options(compiled_model, graph_file)
+
+        graph_file = generate_graph_path(ckpt_name, torch_model)
+        set_compiled_options(compiled_model, graph_file)
         return model
 
     @execute.register(ControlLora)
     def _(self, model, ckpt_name: Optional[str] = None, **kwargs):
         def gen_compile_options(model):
-            if ckpt_name:
-                graph_file = generate_graph_path(ckpt_name, model)
-                return {
-                    "graph_file": graph_file,
-                    "graph_file_device": model_management.get_torch_device(),
-                }
-            else:
-                return {}
+
+            graph_file = generate_graph_path(ckpt_name, model)
+            return {
+                "graph_file": graph_file,
+                "graph_file_device": model_management.get_torch_device(),
+            }
 
         controlnet = OneDiffControlLora.from_controllora(
             model, gen_compile_options=gen_compile_options
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
index 8c2f6bc87..2fe74d63f 100644
--- a/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_ipadapter_plus/set_model_patch_replace.py
@@ -6,8 +6,10 @@
 from ..utils.booster_utils import clear_deployable_module_cache_and_unbind
 from ..patch_management import PatchType, create_patch_executor
 
-
 def set_model_patch_replace_v2(org_fn, model, patch_kwargs, key):
+    apply_patch(org_fn, model, patch_kwargs, key, ipadapter_attention)
+
+def apply_patch(org_fn, model, patch_kwargs, key, attention_func=None)->None:
     diff_model = model.model.diffusion_model
     cache_patch_executor = create_patch_executor(PatchType.CachedCrossAttentionPatch)
     unet_extra_options_patch_executor = create_patch_executor(
@@ -72,7 +74,7 @@ def split_patch_kwargs(patch_kwargs):
 
     if key not in to["patches_replace"]["attn2"]:
         if key not in cache_dict:
-            attn2_m_pt = Attn2Replace(ipadapter_attention, **patch_kwargs)
+            attn2_m_pt = Attn2Replace(attention_func, **patch_kwargs)
             attn2_m_of = torch2oflow(attn2_m_pt, bypass_check=True)
 
             cache_dict[key] = attn2_m_of
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/__init__.py b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/__init__.py
new file mode 100644
index 000000000..66ee9c9b6
--- /dev/null
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/__init__.py
@@ -0,0 +1,4 @@
+from ._config import pulid_comfyui_hijacker, is_load_pulid_comfyui_pkg
+
+if is_load_pulid_comfyui_pkg:
+    from .pulid import *
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/_config.py b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/_config.py
new file mode 100644
index 000000000..9f1509356
--- /dev/null
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/_config.py
@@ -0,0 +1,25 @@
+import os
+import traceback
+
+COMFYUI_ROOT = os.getenv("COMFYUI_ROOT")
+from onediff.infer_compiler.backends.oneflow.import_tools import DynamicModuleLoader
+from onediff.infer_compiler.backends.oneflow.transform import transform_mgr
+
+from ...sd_hijack_utils import Hijacker
+
+__all__ = ["pulid_comfyui_pt", "pulid_comfyui_of"]
+
+pkg_name = "PuLID_ComfyUI"
+pkg_root = os.path.join(COMFYUI_ROOT, "custom_nodes", pkg_name)
+is_load_pulid_comfyui_pkg = True
+try:
+    if os.path.exists(pkg_root):
+        pulid_comfyui_pt = DynamicModuleLoader.from_path(pkg_root)
+        pulid_comfyui_of = transform_mgr.transform_package(pkg_name)
+    else:
+        is_load_pulid_comfyui_pkg = False
+except Exception as e:
+    print(traceback.format_exc())
+    print(f"Warning: Failed to load {pkg_root} due to {e}")
+    is_load_pulid_comfyui_pkg = False
+pulid_comfyui_hijacker = Hijacker()
diff --git a/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/pulid.py b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/pulid.py
new file mode 100644
index 000000000..9fbcf0586
--- /dev/null
+++ b/onediff_comfy_nodes/modules/oneflow/hijack_pulid_comfyui/pulid.py
@@ -0,0 +1,26 @@
+from ..utils.booster_utils import is_using_oneflow_backend
+from ._config import pulid_comfyui_pt, pulid_comfyui_hijacker
+from ..hijack_ipadapter_plus.set_model_patch_replace import apply_patch
+from register_comfy.CrossAttentionPatch import pulid_attention
+
+set_model_patch_replace_pt = pulid_comfyui_pt.pulid.set_model_patch_replace
+
+
+
+def set_model_patch_replace_of(org_fn, model, patch_kwargs, key):
+    apply_patch(
+        org_fn,
+        model=model,
+        patch_kwargs=patch_kwargs,
+        key=key,
+        attention_func=pulid_attention,
+    )
+
+
+def cond_func(org_fn, model, *args, **kwargs):
+    return is_using_oneflow_backend(model)
+
+
+pulid_comfyui_hijacker.register(
+    set_model_patch_replace_pt, set_model_patch_replace_of, cond_func
+)
diff --git a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/CrossAttentionPatch.py b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/CrossAttentionPatch.py
index 42f0f8c53..7b750f942 100644
--- a/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/CrossAttentionPatch.py
+++ b/onediff_comfy_nodes/modules/oneflow/infer_compiler_registry/register_comfy/CrossAttentionPatch.py
@@ -225,4 +225,75 @@ def ipadapter_attention(out, q, k, v, extra_options, module_key='', ipadapter=No
 
 
 def is_crossAttention_patch(module) -> bool:
-    return getattr(module, "_use_crossAttention_patch", False)
\ No newline at end of file
+    return getattr(module, "_use_crossAttention_patch", False)
+
+
+def pulid_attention(out, q, k, v, extra_options, module_key='', pulid=None, cond=None, uncond=None, weight=1.0, ortho=False, ortho_v2=False, mask=None, optimized_attention=None, **kwargs):
+    k_key = module_key + "_to_k_ip"
+    v_key = module_key + "_to_v_ip"
+
+    dtype = q.dtype
+    seq_len = q.shape[1]
+    cond_or_uncond = extra_options["cond_or_uncond"]
+    b = q.shape[0]
+    batch_prompt = b // len(cond_or_uncond)
+    _, _, oh, ow = extra_options["original_shape"]
+
+    #conds = torch.cat([uncond.repeat(batch_prompt, 1, 1), cond.repeat(batch_prompt, 1, 1)], dim=0)
+    #zero_tensor = torch.zeros((conds.size(0), num_zero, conds.size(-1)), dtype=conds.dtype, device=conds.device)
+    #conds = torch.cat([conds, zero_tensor], dim=1)
+    #ip_k = pulid.ip_layers.to_kvs[k_key](conds)
+    #ip_v = pulid.ip_layers.to_kvs[v_key](conds)
+
+    k_cond = pulid.ip_layers.to_kvs[k_key](cond).repeat(batch_prompt, 1, 1)
+    k_uncond = pulid.ip_layers.to_kvs[k_key](uncond).repeat(batch_prompt, 1, 1)
+    v_cond = pulid.ip_layers.to_kvs[v_key](cond).repeat(batch_prompt, 1, 1)
+    v_uncond = pulid.ip_layers.to_kvs[v_key](uncond).repeat(batch_prompt, 1, 1)
+    ip_k = torch.cat([(k_cond, k_uncond)[i] for i in cond_or_uncond], dim=0)
+    ip_v = torch.cat([(v_cond, v_uncond)[i] for i in cond_or_uncond], dim=0)
+
+    out_ip = optimized_attention(q, ip_k, ip_v, extra_options["n_heads"])
+
+    if ortho:
+        out = out.to(dtype=torch.float32)
+        out_ip = out_ip.to(dtype=torch.float32)
+        projection = (torch.sum((out * out_ip), dim=-2, keepdim=True) / torch.sum((out * out), dim=-2, keepdim=True) * out)
+        orthogonal = out_ip - projection
+        out_ip = weight * orthogonal
+    elif ortho_v2:
+        out = out.to(dtype=torch.float32)
+        out_ip = out_ip.to(dtype=torch.float32)
+        attn_map = q @ ip_k.transpose(-2, -1)
+        attn_mean = attn_map.softmax(dim=-1).mean(dim=1, keepdim=True)
+        attn_mean = attn_mean[:, :, :5].sum(dim=-1, keepdim=True)
+        projection = (torch.sum((out * out_ip), dim=-2, keepdim=True) / torch.sum((out * out), dim=-2, keepdim=True) * out)
+        orthogonal = out_ip + (attn_mean - 1) * projection
+        out_ip = weight * orthogonal
+    else:
+        out_ip = out_ip * weight
+
+    if mask is not None:
+        mask_h = oh / math.sqrt(oh * ow / seq_len)
+        mask_h = int(mask_h) + int((seq_len % int(mask_h)) != 0)
+        mask_w = seq_len // mask_h
+
+        mask = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bilinear").squeeze(1)
+        mask = tensor_to_size(mask, batch_prompt)
+
+        mask = mask.repeat(len(cond_or_uncond), 1, 1)
+        mask = mask.view(mask.shape[0], -1, 1).repeat(1, 1, out.shape[2])
+
+        # covers cases where extreme aspect ratios can cause the mask to have a wrong size
+        mask_len = mask_h * mask_w
+        if mask_len < seq_len:
+            pad_len = seq_len - mask_len
+            pad1 = pad_len // 2
+            pad2 = pad_len - pad1
+            mask = F.pad(mask, (0, 0, pad1, pad2), value=0.0)
+        elif mask_len > seq_len:
+            crop_start = (mask_len - seq_len) // 2
+            mask = mask[:, crop_start:crop_start+seq_len, :]
+
+        out_ip = out_ip * mask
+
+    return out_ip.to(dtype=dtype)
diff --git a/onediff_comfy_nodes/modules/oneflow/utils/graph_path.py b/onediff_comfy_nodes/modules/oneflow/utils/graph_path.py
index bc5ef1ca9..d3ba15487 100644
--- a/onediff_comfy_nodes/modules/oneflow/utils/graph_path.py
+++ b/onediff_comfy_nodes/modules/oneflow/utils/graph_path.py
@@ -4,6 +4,7 @@
 
 # ComfyUI
 from folder_paths import get_input_directory
+
 # onediff
 from onediff import __version__ as onediff_version
 from oneflow import __version__ as oneflow_version
@@ -19,10 +20,8 @@ def generate_graph_path(ckpt_name, model) -> Path:
     input_dir = os.getenv("COMFYUI_ONEDIFF_SAVE_GRAPH_DIR", default_dir)
 
     input_dir = Path(input_dir)
-    graph_dir = input_dir / "graphs" / ckpt_name
-
-    key = generate_short_sha256(f"{oneflow_version}{onediff_version}")
+    graph_dir = input_dir / "graphs"
 
-    file_name = f"{type(model).__name__}_{key}"
+    file_name = f"{type(model).__name__}"
 
     return graph_dir / file_name
diff --git a/onediff_comfy_nodes/modules/torch_compile/booster_basic.py b/onediff_comfy_nodes/modules/torch_compile/booster_basic.py
index b02bb8101..a70b9d52a 100644
--- a/onediff_comfy_nodes/modules/torch_compile/booster_basic.py
+++ b/onediff_comfy_nodes/modules/torch_compile/booster_basic.py
@@ -45,12 +45,15 @@ def execute(self, model, ckpt_name=None, **kwargs):
 
     @execute.register(ModelPatcher)
     def _(self, model, ckpt_name: Optional[str] = None, **kwargs):
+        model.model.diffusion_model.to(memory_format=torch.channels_last)
         model.model.diffusion_model = self.compile_fn(model.model.diffusion_model)
         return model
 
     @execute.register(VAE)
     def _(self, model, ckpt_name: Optional[str] = None, **kwargs):
-        model.first_stage_model = self.compile_fn(model.first_stage_model)
+        # https://huggingface.co/blog/sd3#performance-optimizations-for-sd3
+        model.first_stage_model.to(memory_format=torch.channels_last)
+        model.first_stage_model.decode = self.compile_fn(model.first_stage_model.decode)
         return model
 
     @execute.register(ControlNet)
diff --git a/src/onediff/infer_compiler/backends/nexfort/nexfort.py b/src/onediff/infer_compiler/backends/nexfort/nexfort.py
index c31fa75c0..48c8e6b1e 100644
--- a/src/onediff/infer_compiler/backends/nexfort/nexfort.py
+++ b/src/onediff/infer_compiler/backends/nexfort/nexfort.py
@@ -3,7 +3,7 @@
 import torch
 
 from ..registry import register_backend
-from .deployable_module import get_deployable_module
+from .deployable_module import get_deployable_module, NexfortDeployableModule
 
 
 @register_backend("nexfort")
@@ -20,6 +20,9 @@ def fn(torch_module: Callable):
 
         return fn
 
+    if isinstance(torch_module, NexfortDeployableModule):
+        return compile(torch_module._torch_module, options=options)
+
     if isinstance(options, str):
         import json
 
diff --git a/src/onediff/infer_compiler/backends/oneflow/dual_module.py b/src/onediff/infer_compiler/backends/oneflow/dual_module.py
index 634af9494..df4696447 100644
--- a/src/onediff/infer_compiler/backends/oneflow/dual_module.py
+++ b/src/onediff/infer_compiler/backends/oneflow/dual_module.py
@@ -100,6 +100,7 @@ def __setattr__(self, name: str, value: Any) -> None:
         if name in ["_torch_module", "_oneflow_module"]:
             super().__setattr__(name, value)
         else:  # TODO: aviod memory up when set attr
+
             module = self._torch_module
             if (
                 hasattr(module, "_disable_param_update")
@@ -108,6 +109,7 @@ def __setattr__(self, name: str, value: Any) -> None:
                 return
 
             torch_obj = getattr(module, name)
+
             if hasattr(torch_obj, 'copy_'):
                 torch_obj.copy_(value)
             else:
diff --git a/tests/comfy-docker-compose.yml b/tests/comfy-docker-compose.yml
index 99f4d7f4d..9731618bd 100644
--- a/tests/comfy-docker-compose.yml
+++ b/tests/comfy-docker-compose.yml
@@ -26,6 +26,8 @@ services:
       HF_HUB_OFFLINE: "1"
       ONEFLOW_MLIR_ENABLE_TIMING: "1"
       ONEFLOW_MLIR_PRINT_STATS: "1"
+      ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION: "0"
+      COMFYUI_ONEDIFF_SAVE_GRAPH_DIR: "/share_nfs/hf_models/comfyui_resources/input"
       CI: "1"
       SILICON_ONEDIFF_LICENSE_KEY: ${SILICON_ONEDIFF_LICENSE_KEY}
     volumes:
diff --git a/tests/comfyui/extra_model_paths.yaml b/tests/comfyui/extra_model_paths.yaml
index 007a395e9..50dab46dc 100644
--- a/tests/comfyui/extra_model_paths.yaml
+++ b/tests/comfyui/extra_model_paths.yaml
@@ -8,15 +8,18 @@ comfyui:
     base_path: /app/ComfyUI
     # checkpoints: /home/fengwen/workspace/test_checkpoints
     checkpoints: /share_nfs/hf_models/comfyui_resources/checkpoints
-    clip: models/clip/
+    unet: /share_nfs/hf_models/comfyui_resources/unet
+    clip: /share_nfs/hf_models/comfyui_resources/clip
     clip_vision: /share_nfs/hf_models/comfyui_resources/clip_vision
     configs: models/configs/
-    controlnet: models/controlnet/
+    controlnet: /share_nfs/hf_models/comfyui_resources/controlnet
     embeddings: models/embeddings/
-    loras: /share_nfs/hf_models/comfyui_resources/loras/
+    loras: /share_nfs/hf_models/comfyui_resources/loras
     upscale_models: models/upscale_models/
     vae: models/vae/
     ipadapter: /share_nfs/hf_models/comfyui_resources/ipadapter
+    pulid: /share_nfs/hf_models/comfyui_resources/pulid
+    insightfac: /share_nfs/hf_models/comfyui_resources/insightfac
 
 #other_ui:
 #    base_path: path/to/ui