Skip to content

Commit 8376dfb

Browse files
rmatifleejet
andauthored
feat: add sgm_uniform scheduler, simple scheduler, and support for NitroFusion (#675)
* feat: Add timestep shift and two new schedulers * update readme * fix spaces * format code * simplify SGMUniformSchedule * simplify shifted_timestep logic * avoid conflict --------- Co-authored-by: leejet <leejet714@gmail.com>
1 parent 0ebe6fe commit 8376dfb

File tree

5 files changed

+116
-9
lines changed

5 files changed

+116
-9
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,10 @@ arguments:
326326
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
327327
--skip-layer-start START SLG enabling point: (default: 0.01)
328328
--skip-layer-end END SLG disabling point: (default: 0.2)
329-
--scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
329+
--scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
330330
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
331331
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
332+
--timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
332333
--steps STEPS number of sample steps (default: 20)
333334
--high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)
334335
--high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
@@ -339,7 +340,7 @@ arguments:
339340
--high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])
340341
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
341342
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
342-
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
343+
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
343344
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
344345
(high noise) sampling method (default: "euler_a")
345346
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
@@ -352,7 +353,7 @@ arguments:
352353
--rng {std_default, cuda} RNG (default: cuda)
353354
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
354355
-b, --batch-count COUNT number of images to generate
355-
--clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
356+
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
356357
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
357358
--vae-tiling process vae in tiles to reduce memory usage
358359
--vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)

denoiser.hpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,25 @@ struct GITSSchedule : SigmaSchedule {
232232
}
233233
};
234234

235+
struct SGMUniformSchedule : SigmaSchedule {
236+
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
237+
std::vector<float> result;
238+
if (n == 0) {
239+
result.push_back(0.0f);
240+
return result;
241+
}
242+
result.reserve(n + 1);
243+
int t_max = TIMESTEPS - 1;
244+
int t_min = 0;
245+
std::vector<float> timesteps = linear_space(static_cast<float>(t_max), static_cast<float>(t_min), n + 1);
246+
for (int i = 0; i < n; i++) {
247+
result.push_back(t_to_sigma_func(timesteps[i]));
248+
}
249+
result.push_back(0.0f);
250+
return result;
251+
}
252+
};
253+
235254
struct KarrasSchedule : SigmaSchedule {
236255
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
237256
// These *COULD* be function arguments here,
@@ -251,6 +270,35 @@ struct KarrasSchedule : SigmaSchedule {
251270
}
252271
};
253272

273+
struct SimpleSchedule : SigmaSchedule {
274+
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
275+
std::vector<float> result_sigmas;
276+
277+
if (n == 0) {
278+
return result_sigmas;
279+
}
280+
281+
result_sigmas.reserve(n + 1);
282+
283+
int model_sigmas_len = TIMESTEPS;
284+
285+
float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);
286+
287+
for (uint32_t i = 0; i < n; ++i) {
288+
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
289+
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;
290+
291+
if (timestep_index < 0) {
292+
timestep_index = 0;
293+
}
294+
295+
result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
296+
}
297+
result_sigmas.push_back(0.0f);
298+
return result_sigmas;
299+
}
300+
};
301+
254302
// Close to Beta Schedule, but increadably simple in code.
255303
struct SmoothStepSchedule : SigmaSchedule {
256304
static constexpr float smoothstep(float x) {

examples/cli/main.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,9 +248,10 @@ void print_usage(int argc, const char* argv[]) {
248248
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
249249
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
250250
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
251-
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
251+
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
252252
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
253253
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
254+
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
254255
printf(" --steps STEPS number of sample steps (default: 20)\n");
255256
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
256257
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
@@ -261,7 +262,7 @@ void print_usage(int argc, const char* argv[]) {
261262
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
262263
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
263264
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
264-
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
265+
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
265266
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
266267
printf(" (high noise) sampling method (default: \"euler_a\")\n");
267268
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
@@ -274,7 +275,7 @@ void print_usage(int argc, const char* argv[]) {
274275
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
275276
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
276277
printf(" -b, --batch-count COUNT number of images to generate\n");
277-
printf(" --clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
278+
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
278279
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
279280
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
280281
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
@@ -520,6 +521,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
520521
{"", "--chroma-t5-mask-pad", "", &params.chroma_t5_mask_pad},
521522
{"", "--video-frames", "", &params.video_frames},
522523
{"", "--fps", "", &params.fps},
524+
{"", "--timestep-shift", "", &params.sample_params.shifted_timestep},
523525
};
524526

525527
options.float_options = {
@@ -875,6 +877,11 @@ void parse_args(int argc, const char** argv, SDParams& params) {
875877
exit(1);
876878
}
877879

880+
if (params.sample_params.shifted_timestep < 0 || params.sample_params.shifted_timestep > 1000) {
881+
fprintf(stderr, "error: timestep-shift must be between 0 and 1000\n");
882+
exit(1);
883+
}
884+
878885
if (params.upscale_repeats < 1) {
879886
fprintf(stderr, "error: upscale multiplier must be at least 1\n");
880887
exit(1);

stable-diffusion.cpp

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,16 @@ class StableDiffusionGGML {
747747
denoiser->scheduler = std::make_shared<GITSSchedule>();
748748
denoiser->scheduler->version = version;
749749
break;
750+
case SGM_UNIFORM:
751+
LOG_INFO("Running with SGM Uniform schedule");
752+
denoiser->scheduler = std::make_shared<SGMUniformSchedule>();
753+
denoiser->scheduler->version = version;
754+
break;
755+
case SIMPLE:
756+
LOG_INFO("Running with Simple schedule");
757+
denoiser->scheduler = std::make_shared<SimpleSchedule>();
758+
denoiser->scheduler->version = version;
759+
break;
750760
case SMOOTHSTEP:
751761
LOG_INFO("Running with SmoothStep scheduler");
752762
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
@@ -1033,6 +1043,7 @@ class StableDiffusionGGML {
10331043
float control_strength,
10341044
sd_guidance_params_t guidance,
10351045
float eta,
1046+
int shifted_timestep,
10361047
sample_method_t method,
10371048
const std::vector<float>& sigmas,
10381049
int start_merge_step,
@@ -1042,6 +1053,10 @@ class StableDiffusionGGML {
10421053
ggml_tensor* denoise_mask = NULL,
10431054
ggml_tensor* vace_context = NULL,
10441055
float vace_strength = 1.f) {
1056+
if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
1057+
LOG_WARN("timestep shifting is only supported for SDXL models!");
1058+
shifted_timestep = 0;
1059+
}
10451060
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);
10461061

10471062
float cfg_scale = guidance.txt_cfg;
@@ -1102,7 +1117,17 @@ class StableDiffusionGGML {
11021117
float c_in = scaling[2];
11031118

11041119
float t = denoiser->sigma_to_t(sigma);
1105-
std::vector<float> timesteps_vec(1, t); // [N, ]
1120+
std::vector<float> timesteps_vec;
1121+
if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
1122+
float shifted_t_float = t * (float(shifted_timestep) / float(TIMESTEPS));
1123+
int64_t shifted_t = static_cast<int64_t>(roundf(shifted_t_float));
1124+
shifted_t = std::max((int64_t)0, std::min((int64_t)(TIMESTEPS - 1), shifted_t));
1125+
LOG_DEBUG("shifting timestep from %.2f to %" PRId64 " (sigma: %.4f)", t, shifted_t, sigma);
1126+
timesteps_vec.assign(1, (float)shifted_t);
1127+
} else {
1128+
timesteps_vec.assign(1, t);
1129+
}
1130+
11061131
timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
11071132
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
11081133
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
@@ -1200,6 +1225,19 @@ class StableDiffusionGGML {
12001225
float* vec_input = (float*)input->data;
12011226
float* positive_data = (float*)out_cond->data;
12021227
int ne_elements = (int)ggml_nelements(denoised);
1228+
1229+
if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
1230+
int64_t shifted_t_idx = static_cast<int64_t>(roundf(timesteps_vec[0]));
1231+
float shifted_sigma = denoiser->t_to_sigma((float)shifted_t_idx);
1232+
std::vector<float> shifted_scaling = denoiser->get_scalings(shifted_sigma);
1233+
float shifted_c_skip = shifted_scaling[0];
1234+
float shifted_c_out = shifted_scaling[1];
1235+
float shifted_c_in = shifted_scaling[2];
1236+
1237+
c_skip = shifted_c_skip * c_in / shifted_c_in;
1238+
c_out = shifted_c_out;
1239+
}
1240+
12031241
for (int i = 0; i < ne_elements; i++) {
12041242
float latent_result = positive_data[i];
12051243
if (has_unconditioned) {
@@ -1222,6 +1260,7 @@ class StableDiffusionGGML {
12221260
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
12231261
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
12241262
}
1263+
12251264
int64_t t1 = ggml_time_us();
12261265
if (step > 0) {
12271266
pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
@@ -1588,6 +1627,8 @@ const char* schedule_to_str[] = {
15881627
"exponential",
15891628
"ays",
15901629
"gits",
1630+
"sgm_uniform",
1631+
"simple",
15911632
"smoothstep",
15921633
};
15931634

@@ -1720,7 +1761,8 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
17201761
"scheduler: %s, "
17211762
"sample_method: %s, "
17221763
"sample_steps: %d, "
1723-
"eta: %.2f)",
1764+
"eta: %.2f, "
1765+
"shifted_timestep: %d)",
17241766
sample_params->guidance.txt_cfg,
17251767
sample_params->guidance.img_cfg,
17261768
sample_params->guidance.distilled_guidance,
@@ -1731,7 +1773,8 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
17311773
sd_schedule_name(sample_params->scheduler),
17321774
sd_sample_method_name(sample_params->sample_method),
17331775
sample_params->sample_steps,
1734-
sample_params->eta);
1776+
sample_params->eta,
1777+
sample_params->shifted_timestep);
17351778

17361779
return buf;
17371780
}
@@ -1863,6 +1906,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
18631906
int clip_skip,
18641907
sd_guidance_params_t guidance,
18651908
float eta,
1909+
int shifted_timestep,
18661910
int width,
18671911
int height,
18681912
enum sample_method_t sample_method,
@@ -2101,6 +2145,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
21012145
control_strength,
21022146
guidance,
21032147
eta,
2148+
shifted_timestep,
21042149
sample_method,
21052150
sigmas,
21062151
start_merge_step,
@@ -2394,6 +2439,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
23942439
sd_img_gen_params->clip_skip,
23952440
sd_img_gen_params->sample_params.guidance,
23962441
sd_img_gen_params->sample_params.eta,
2442+
sd_img_gen_params->sample_params.shifted_timestep,
23972443
width,
23982444
height,
23992445
sample_method,
@@ -2734,6 +2780,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
27342780
0,
27352781
sd_vid_gen_params->high_noise_sample_params.guidance,
27362782
sd_vid_gen_params->high_noise_sample_params.eta,
2783+
sd_vid_gen_params->high_noise_sample_params.shifted_timestep,
27372784
sd_vid_gen_params->high_noise_sample_params.sample_method,
27382785
high_noise_sigmas,
27392786
-1,
@@ -2769,6 +2816,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
27692816
0,
27702817
sd_vid_gen_params->sample_params.guidance,
27712818
sd_vid_gen_params->sample_params.eta,
2819+
sd_vid_gen_params->sample_params.shifted_timestep,
27722820
sd_vid_gen_params->sample_params.sample_method,
27732821
sigmas,
27742822
-1,

stable-diffusion.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ enum scheduler_t {
5858
EXPONENTIAL,
5959
AYS,
6060
GITS,
61+
SGM_UNIFORM,
62+
SIMPLE,
6163
SMOOTHSTEP,
6264
SCHEDULE_COUNT
6365
};
@@ -183,6 +185,7 @@ typedef struct {
183185
enum sample_method_t sample_method;
184186
int sample_steps;
185187
float eta;
188+
int shifted_timestep;
186189
} sd_sample_params_t;
187190

188191
typedef struct {

0 commit comments

Comments
 (0)