Skip to content

Commit 98558f9

Browse files
committed
update DPM++ and juggernaut-lightning
1 parent bca9a72 commit 98558f9

File tree

7 files changed

+411
-11
lines changed

7 files changed

+411
-11
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ For users who can connect to huggingface, please setting `LLAVA_CLIP_PATH, SDXL_
3939
* [SDXL base 1.0_0.9vae](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_base_1.0_0.9vae.safetensors)
4040
* [LLaVA CLIP](https://huggingface.co/openai/clip-vit-large-patch14-336)
4141
* [LLaVA v1.5 13B](https://huggingface.co/liuhaotian/llava-v1.5-13b)
42+
* (optional) [Juggernaut-XL_v9_RunDiffusionPhoto_v2](https://huggingface.co/RunDiffusion/Juggernaut-XL-v9/blob/main/Juggernaut-XL_v9_RunDiffusionPhoto_v2.safetensors)
43+
* Replacement of `SDXL base 1.0_0.9vae` for Photo Realistic
44+
* (optional) [Juggernaut_RunDiffusionPhoto2_Lightning_4Steps](https://huggingface.co/RunDiffusion/Juggernaut-XL-Lightning/blob/main/Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors)
45+
* Distilling model used in `SUPIR_v0_Juggernautv9_lightning.yaml`
4246

4347

4448
#### Models we provided:
@@ -109,6 +113,9 @@ CUDA_VISIBLE_DEVICES=0,1 python test.py --img_dir '/opt/data/private/LV_Dataset/
109113
```Shell
110114
CUDA_VISIBLE_DEVICES=0,1 python gradio_demo.py --ip 0.0.0.0 --port 6688 --use_image_slider --log_history
111115
116+
# Juggernaut_RunDiffusionPhoto2_Lightning_4Steps and DPM++ M2 SDE Karras for fast sampling
117+
CUDA_VISIBLE_DEVICES=0,1 python gradio_demo.py --ip 0.0.0.0 --port 6688 --use_image_slider --log_history --opt options/SUPIR_v0_Juggernautv9_lightning.yaml
118+
112119
# less VRAM & slower (12G for Diffusion, 16G for LLaVA)
113120
CUDA_VISIBLE_DEVICES=0,1 python gradio_demo.py --ip 0.0.0.0 --port 6688 --use_image_slider --log_history --loading_half_params --use_tile_vae --load_8bit_llava
114121
```

SUPIR/util.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def create_model(config_path):
3131
return model
3232

3333

34-
def create_SUPIR_model(config_path, SUPIR_sign=None):
34+
def create_SUPIR_model(config_path, SUPIR_sign=None, load_default_setting=False):
3535
config = OmegaConf.load(config_path)
3636
model = instantiate_from_config(config.model).cpu()
3737
print(f'Loaded model config from [{config_path}]')
@@ -45,6 +45,9 @@ def create_SUPIR_model(config_path, SUPIR_sign=None):
4545
model.load_state_dict(load_state_dict(config.SUPIR_CKPT_F), strict=False)
4646
elif SUPIR_sign == 'Q':
4747
model.load_state_dict(load_state_dict(config.SUPIR_CKPT_Q), strict=False)
48+
if load_default_setting:
49+
default_setting = config.default_setting
50+
return model, default_setting
4851
return model
4952

5053
def load_QF_ckpt(config_path):

gradio_demo.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import time
1616

1717
parser = argparse.ArgumentParser()
18+
parser.add_argument("--opt", type=str, default='options/SUPIR_v0.yaml')
1819
parser.add_argument("--ip", type=str, default='127.0.0.1')
1920
parser.add_argument("--port", type=int, default='6688')
2021
parser.add_argument("--no_llava", action='store_true', default=False)
@@ -40,15 +41,15 @@
4041
raise ValueError('Currently support CUDA only.')
4142

4243
# load SUPIR
43-
model = create_SUPIR_model('options/SUPIR_v0.yaml', SUPIR_sign='Q')
44+
model, default_setting = create_SUPIR_model(args.opt, SUPIR_sign='Q', load_default_setting=True)
4445
if args.loading_half_params:
4546
model = model.half()
4647
if args.use_tile_vae:
4748
model.init_tile_vae(encoder_tile_size=args.encoder_tile_size, decoder_tile_size=args.decoder_tile_size)
4849
model = model.to(SUPIR_device)
4950
model.first_stage_model.denoise_encoder_s1 = copy.deepcopy(model.first_stage_model.denoise_encoder)
5051
model.current_model = 'v0-Q'
51-
ckpt_Q, ckpt_F = load_QF_ckpt('options/SUPIR_v0.yaml')
52+
ckpt_Q, ckpt_F = load_QF_ckpt(args.opt)
5253

5354
# load LLaVA
5455
if use_llava:
@@ -144,7 +145,7 @@ def stage2_process(input_image, prompt, a_prompt, n_prompt, num_samples, upscale
144145

145146

146147
def load_and_reset(param_setting):
147-
edm_steps = 50
148+
edm_steps = default_setting.edm_steps
148149
s_stage2 = 1.0
149150
s_stage1 = -1.0
150151
s_churn = 5
@@ -160,11 +161,11 @@ def load_and_reset(param_setting):
160161
linear_s_stage2 = False
161162
linear_CFG = True
162163
if param_setting == "Quality":
163-
s_cfg = 7.5
164-
spt_linear_CFG = 4.0
164+
s_cfg = default_setting.s_cfg_Quality
165+
spt_linear_CFG = default_setting.spt_linear_CFG_Quality
165166
elif param_setting == "Fidelity":
166-
s_cfg = 4.0
167-
spt_linear_CFG = 1.0
167+
s_cfg = default_setting.s_cfg_Fidelity
168+
spt_linear_CFG = default_setting.spt_linear_CFG_Fidelity
168169
else:
169170
raise NotImplementedError
170171
return edm_steps, s_cfg, s_stage2, s_stage1, s_churn, s_noise, a_prompt, n_prompt, color_fix_type, linear_CFG, \
@@ -230,8 +231,9 @@ def submit_feedback(event_id, fb_score, fb_text):
230231
num_samples = gr.Slider(label="Num Samples", minimum=1, maximum=4 if not args.use_image_slider else 1
231232
, value=1, step=1)
232233
upscale = gr.Slider(label="Upscale", minimum=1, maximum=8, value=1, step=1)
233-
edm_steps = gr.Slider(label="Steps", minimum=20, maximum=200, value=50, step=1)
234-
s_cfg = gr.Slider(label="Text Guidance Scale", minimum=1.0, maximum=15.0, value=7.5, step=0.1)
234+
edm_steps = gr.Slider(label="Steps", minimum=1, maximum=200, value=default_setting.edm_steps, step=1)
235+
s_cfg = gr.Slider(label="Text Guidance Scale", minimum=1.0, maximum=15.0,
236+
value=default_setting.s_cfg_Quality, step=0.1)
235237
s_stage2 = gr.Slider(label="Stage2 Guidance Strength", minimum=0., maximum=1., value=1., step=0.05)
236238
s_stage1 = gr.Slider(label="Stage1 Guidance Strength", minimum=-1.0, maximum=6.0, value=-1.0, step=1.0)
237239
seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
@@ -251,7 +253,7 @@ def submit_feedback(event_id, fb_score, fb_text):
251253
with gr.Column():
252254
linear_CFG = gr.Checkbox(label="Linear CFG", value=True)
253255
spt_linear_CFG = gr.Slider(label="CFG Start", minimum=1.0,
254-
maximum=9.0, value=4.0, step=0.5)
256+
maximum=9.0, value=default_setting.spt_linear_CFG_Quality, step=0.5)
255257
with gr.Column():
256258
linear_s_stage2 = gr.Checkbox(label="Linear Stage2 Guidance", value=False)
257259
spt_linear_s_stage2 = gr.Slider(label="Guidance Start", minimum=0.,

options/SUPIR_v0.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,9 @@ SUPIR_CKPT_F: /opt/data/private/AIGC_pretrain/SUPIR_cache/SUPIR-v0F.ckpt
154154
SUPIR_CKPT_Q: /opt/data/private/AIGC_pretrain/SUPIR_cache/SUPIR-v0Q.ckpt
155155
SUPIR_CKPT: ~
156156

157+
default_setting:
158+
s_cfg_Quality: 7.5
159+
spt_linear_CFG_Quality: 4.0
160+
s_cfg_Fidelity: 4.0
161+
spt_linear_CFG_Fidelity: 1.0
162+
edm_steps: 50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
model:
2+
target: SUPIR.models.SUPIR_model.SUPIRModel
3+
params:
4+
ae_dtype: bf16
5+
diffusion_dtype: fp16
6+
scale_factor: 0.13025
7+
disable_first_stage_autocast: True
8+
network_wrapper: sgm.modules.diffusionmodules.wrappers.ControlWrapper
9+
10+
denoiser_config:
11+
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiserWithControl
12+
params:
13+
num_idx: 1000
14+
weighting_config:
15+
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
16+
scaling_config:
17+
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
18+
discretization_config:
19+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
20+
21+
control_stage_config:
22+
target: SUPIR.modules.SUPIR_v0.GLVControl
23+
params:
24+
adm_in_channels: 2816
25+
num_classes: sequential
26+
use_checkpoint: True
27+
in_channels: 4
28+
out_channels: 4
29+
model_channels: 320
30+
attention_resolutions: [4, 2]
31+
num_res_blocks: 2
32+
channel_mult: [1, 2, 4]
33+
num_head_channels: 64
34+
use_spatial_transformer: True
35+
use_linear_in_transformer: True
36+
transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
37+
# transformer_depth: [1, 1, 4]
38+
context_dim: 2048
39+
spatial_transformer_attn_type: softmax-xformers
40+
legacy: False
41+
input_upscale: 1
42+
43+
network_config:
44+
target: SUPIR.modules.SUPIR_v0.LightGLVUNet
45+
params:
46+
mode: XL-base
47+
project_type: ZeroSFT
48+
project_channel_scale: 2
49+
adm_in_channels: 2816
50+
num_classes: sequential
51+
use_checkpoint: True
52+
in_channels: 4
53+
out_channels: 4
54+
model_channels: 320
55+
attention_resolutions: [4, 2]
56+
num_res_blocks: 2
57+
channel_mult: [1, 2, 4]
58+
num_head_channels: 64
59+
use_spatial_transformer: True
60+
use_linear_in_transformer: True
61+
transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
62+
context_dim: 2048
63+
spatial_transformer_attn_type: softmax-xformers
64+
legacy: False
65+
66+
conditioner_config:
67+
target: sgm.modules.GeneralConditionerWithControl
68+
params:
69+
emb_models:
70+
# crossattn cond
71+
- is_trainable: False
72+
input_key: txt
73+
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
74+
params:
75+
layer: hidden
76+
layer_idx: 11
77+
# crossattn and vector cond
78+
- is_trainable: False
79+
input_key: txt
80+
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
81+
params:
82+
arch: ViT-bigG-14
83+
version: laion2b_s39b_b160k
84+
freeze: True
85+
layer: penultimate
86+
always_return_pooled: True
87+
legacy: False
88+
# vector cond
89+
- is_trainable: False
90+
input_key: original_size_as_tuple
91+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
92+
params:
93+
outdim: 256 # multiplied by two
94+
# vector cond
95+
- is_trainable: False
96+
input_key: crop_coords_top_left
97+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
98+
params:
99+
outdim: 256 # multiplied by two
100+
# vector cond
101+
- is_trainable: False
102+
input_key: target_size_as_tuple
103+
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
104+
params:
105+
outdim: 256 # multiplied by two
106+
107+
first_stage_config:
108+
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
109+
params:
110+
ckpt_path: ~
111+
embed_dim: 4
112+
monitor: val/rec_loss
113+
ddconfig:
114+
attn_type: vanilla-xformers
115+
double_z: true
116+
z_channels: 4
117+
resolution: 256
118+
in_channels: 3
119+
out_ch: 3
120+
ch: 128
121+
ch_mult: [ 1, 2, 4, 4 ]
122+
num_res_blocks: 2
123+
attn_resolutions: [ ]
124+
dropout: 0.0
125+
lossconfig:
126+
target: torch.nn.Identity
127+
128+
sampler_config:
129+
target: sgm.modules.diffusionmodules.sampling.RestoreDPMPP2MSampler
130+
params:
131+
num_steps: 100
132+
restore_cfg: 4.0
133+
s_churn: 0
134+
s_noise: 1.003
135+
discretization_config:
136+
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
137+
guider_config:
138+
target: sgm.modules.diffusionmodules.guiders.LinearCFG
139+
params:
140+
scale: 7.5
141+
scale_min: 4.0
142+
143+
p_p:
144+
'Cinematic, High Contrast, highly detailed, taken using a Canon EOS R camera,
145+
hyper detailed photo - realistic maximum detail, 32k, Color Grading, ultra HD, extreme meticulous detailing,
146+
skin pore detailing, hyper sharpness, perfect without deformations.'
147+
n_p:
148+
'painting, oil painting, illustration, drawing, art, sketch, oil painting, cartoon, CG Style, 3D render,
149+
unreal engine, blurring, dirty, messy, worst quality, low quality, frames, watermark, signature,
150+
jpeg artifacts, deformed, lowres, over-smooth'
151+
152+
SDXL_CKPT: /opt/data/private/AIGC_pretrain/SDXL_lightning_cache/Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors
153+
SUPIR_CKPT_F: /opt/data/private/AIGC_pretrain/SUPIR_cache/SUPIR-v0F.ckpt
154+
SUPIR_CKPT_Q: /opt/data/private/AIGC_pretrain/SUPIR_cache/SUPIR-v0Q.ckpt
155+
SUPIR_CKPT: ~
156+
157+
default_setting:
158+
s_cfg_Quality: 2.0
159+
spt_linear_CFG_Quality: 2.0
160+
s_cfg_Fidelity: 1.5
161+
spt_linear_CFG_Fidelity: 1.5
162+
edm_steps: 8

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,5 @@ urllib3==1.26.15
3838
webdataset==0.2.48
3939
xformers>=0.0.20
4040
facexlib==0.3.0
41+
k-diffusion==0.1.1.post1
42+
diffusers==0.16.1

0 commit comments

Comments
 (0)