Skip to content

Commit 0e24dbb

Browse files
Adjustments to Z Image. (#10893)
1 parent e9aae31 commit 0e24dbb

File tree

1 file changed

+21
-2
lines changed

1 file changed

+21
-2
lines changed

comfy/supported_models.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import comfy.text_encoders.omnigen2
2222
import comfy.text_encoders.qwen_image
2323
import comfy.text_encoders.hunyuan_image
24+
import comfy.text_encoders.z_image
2425

2526
from . import supported_models_base
2627
from . import latent_formats
@@ -994,7 +995,7 @@ class Lumina2(supported_models_base.BASE):
994995
"shift": 6.0,
995996
}
996997

997-
memory_usage_factor = 1.2
998+
memory_usage_factor = 1.4
998999

9991000
unet_extra_config = {}
10001001
latent_format = latent_formats.Flux
@@ -1013,6 +1014,24 @@ def clip_target(self, state_dict={}):
10131014
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}gemma2_2b.transformer.".format(pref))
10141015
return supported_models_base.ClipTarget(comfy.text_encoders.lumina2.LuminaTokenizer, comfy.text_encoders.lumina2.te(**hunyuan_detect))
10151016

1017+
class ZImage(Lumina2):
1018+
unet_config = {
1019+
"image_model": "lumina2",
1020+
"dim": 3840,
1021+
}
1022+
1023+
sampling_settings = {
1024+
"multiplier": 1.0,
1025+
"shift": 3.0,
1026+
}
1027+
1028+
memory_usage_factor = 1.7
1029+
1030+
def clip_target(self, state_dict={}):
1031+
pref = self.text_encoder_key_prefix[0]
1032+
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_4b.transformer.".format(pref))
1033+
return supported_models_base.ClipTarget(comfy.text_encoders.z_image.ZImageTokenizer, comfy.text_encoders.z_image.te(**hunyuan_detect))
1034+
10161035
class WAN21_T2V(supported_models_base.BASE):
10171036
unet_config = {
10181037
"image_model": "wan2.1",
@@ -1453,7 +1472,7 @@ def clip_target(self, state_dict={}):
14531472
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_7b.transformer.".format(pref))
14541473
return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_video.HunyuanVideo15Tokenizer, comfy.text_encoders.hunyuan_image.te(**hunyuan_detect))
14551474

1456-
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2]
1475+
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2]
14571476

14581477

14591478
models += [SVD_img2vid]

0 commit comments

Comments
 (0)