@@ -808,6 +808,10 @@ class StableDiffusionGGML {
808808 // TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
809809
810810 float img_cfg_scale = guidance;
811+ if (img_cfg_scale != cfg_scale && sd_version_use_concat (version)) {
812+ LOG_WARN (" 2-conditioning CFG is not supported with this model, disabling it..." );
813+ img_cfg_scale = cfg_scale;
814+ }
811815
812816 LOG_DEBUG (" Sample" );
813817 struct ggml_init_params params;
@@ -830,9 +834,8 @@ class StableDiffusionGGML {
830834
831835 struct ggml_tensor * noised_input = ggml_dup_tensor (work_ctx, noise);
832836
833- bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
834- bool has_img_guidance = version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != img_cfg_scale;
835- has_unconditioned = has_unconditioned || has_img_guidance;
837+ bool has_unconditioned = img_cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
838+ bool has_img_guidance = cfg_scale != img_cfg_scale && uncond.c_crossattn != NULL ;
836839 bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
837840
838841 // denoise wrapper
@@ -989,9 +992,13 @@ class StableDiffusionGGML {
989992 if (has_img_guidance) {
990993 latent_result = negative_data[i] + img_cfg_scale * (img_cond_data[i] - negative_data[i]) + cfg_scale * (positive_data[i] - img_cond_data[i]);
991994 } else {
995+ // img_cfg_scale == cfg_scale
992996 latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
993997 }
994998 }
999+ } else if (has_img_guidance){
1000+ // img_cfg_scale == 1
1001+ latent_result = img_cond_data[i] + cfg_scale * (positive_data[i] - img_cond_data[i]);
9951002 }
9961003 if (is_skiplayer_step) {
9971004 latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
0 commit comments