diff --git a/examples/cli/README.md b/examples/cli/README.md index 904f3c441..5fca0285e 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -109,7 +109,7 @@ Generation Options: medium --skip-layer-start SLG enabling point (default: 0.01) --skip-layer-end SLG disabling point (default: 0.2) - --eta eta in DDIM, only for DDIM and TCD (default: 0) + --eta noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a) --flow-shift shift value for Flow models like SD3.x or WAN (default: auto) --high-noise-cfg-scale (high noise) unconditional guidance scale: (default: 7.0) --high-noise-img-cfg-scale (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale) @@ -117,7 +117,7 @@ Generation Options: --high-noise-slg-scale (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0) --high-noise-skip-layer-start (high noise) SLG enabling point (default: 0.01) --high-noise-skip-layer-end (high noise) SLG disabling point (default: 0.2) - --high-noise-eta (high noise) eta in DDIM, only for DDIM and TCD (default: 0) + --high-noise-eta (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a) --strength strength for noising/unnoising (default: 0.75) --pm-style-strength --control-strength strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 9389b03a3..29aa99c66 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -1197,7 +1197,7 @@ struct SDGenerationParams { &sample_params.guidance.slg.layer_end}, {"", "--eta", - "eta in DDIM, only for DDIM and TCD (default: 0)", + "noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)", &sample_params.eta}, {"", "--flow-shift", @@ -1229,7 +1229,7 @@ struct SDGenerationParams { &high_noise_sample_params.guidance.slg.layer_end}, {"", "--high-noise-eta", - "(high noise) eta in DDIM, only for DDIM and TCD (default: 0)", + "(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)", &high_noise_sample_params.eta}, {"", "--strength", diff --git a/examples/server/README.md b/examples/server/README.md index 8aa2158f5..9ba3dfd1f 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -189,7 +189,7 @@ Default Generation Options: medium --skip-layer-start SLG enabling point (default: 0.01) --skip-layer-end SLG disabling point (default: 0.2) - --eta eta in DDIM, only for DDIM and TCD (default: 0) + --eta noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a) --flow-shift shift value for Flow models like SD3.x or WAN (default: auto) --high-noise-cfg-scale (high noise) unconditional guidance scale: (default: 7.0) --high-noise-img-cfg-scale (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale) @@ -197,7 +197,7 @@ Default Generation Options: --high-noise-slg-scale (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0) --high-noise-skip-layer-start (high noise) SLG enabling point (default: 0.01) --high-noise-skip-layer-end (high noise) SLG disabling point (default: 0.2) - --high-noise-eta (high noise) eta in DDIM, only for DDIM and TCD (default: 0) + --high-noise-eta (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a) --strength strength for noising/unnoising (default: 0.75) --pm-style-strength --control-strength strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image diff --git a/src/denoiser.hpp b/src/denoiser.hpp index b92ca4e3f..45bad35e8 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -761,1148 +761,1238 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser { typedef std::function denoise_cb_t; -// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t -static bool sample_k_diffusion(sample_method_t method, - denoise_cb_t model, - ggml_context* work_ctx, - ggml_tensor* x, - std::vector sigmas, - std::shared_ptr rng, - float eta) { - size_t steps = sigmas.size() - 1; - // sample_euler_ancestral - switch (method) { - case EULER_A_SAMPLE_METHOD: { - ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - ggml_tensor* denoised = model(x, sigma, i + 1); - if (denoised == nullptr) { - return false; - } - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int i = 0; i < ggml_nelements(d); i++) { - vec_d[i] = (vec_x[i] - vec_denoised[i]) / sigma; - } - } - - // get_ancestral_step - float sigma_up = std::min(sigmas[i + 1], - std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); - float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); - - // Euler method - float dt = sigma_down - sigmas[i]; - // x = x + d * dt - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_d[i] * dt; - } - } - - if (sigmas[i + 1] > 0) { - // x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up - ggml_ext_im_set_randn_f32(noise, rng); - // noise = load_tensor_from_file(work_ctx, "./rand" + std::to_string(i+1) + ".bin"); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; - } - } - } +static void generate_ancestral_step(float& sigma_up, float& sigma_down, float sigma_from, float sigma_to, float eta = 1.0f) { + // sigma_up = min(sigma_to, eta * √(sigma_to² * (sigma_from² - sigma_to²) / sigma_from²)) + // sigma_down = √(sigma_to² - sigma_sup²) + sigma_up = 0.0f; + sigma_down = sigma_to; + if (eta > 0.0f) { + float sigma_from_sq = sigma_from * sigma_from; + float sigma_to_sq = sigma_to * sigma_to; + if (sigma_from_sq > 0.0f) { + float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq; + if (term > 0.0f) { + sigma_up = eta * std::sqrt(term); } - } break; - case EULER_SAMPLE_METHOD: // Implemented without any sigma churn - { - ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - ggml_tensor* denoised = model(x, sigma, i + 1); - if (denoised == nullptr) { - return false; - } - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(d); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigma; - } - } - - float dt = sigmas[i + 1] - sigma; - // x = x + d * dt - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } - } - } break; - case HEUN_SAMPLE_METHOD: { - ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - ggml_tensor* denoised = model(x, sigmas[i], -(i + 1)); - if (denoised == nullptr) { - return false; - } - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; - } - } - - float dt = sigmas[i + 1] - sigmas[i]; - if (sigmas[i + 1] == 0) { - // Euler step - // x = x + d * dt - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } else { - // Heun step - float* vec_d = (float*)d->data; - float* vec_d2 = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = vec_x[j] + vec_d[j] * dt; - } - - ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1); - if (denoised == nullptr) { - return false; - } - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - float d2 = (vec_x2[j] - vec_denoised[j]) / sigmas[i + 1]; - vec_d[j] = (vec_d[j] + d2) / 2; - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } - } - } break; - case DPM2_SAMPLE_METHOD: { - ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - ggml_tensor* denoised = model(x, sigmas[i], -(i + 1)); - if (denoised == nullptr) { - return false; - } - - // d = (x - denoised) / sigma - { - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_d[j] = (vec_x[j] - vec_denoised[j]) / sigmas[i]; - } - } - - if (sigmas[i + 1] == 0) { - // Euler step - // x = x + d * dt - float dt = sigmas[i + 1] - sigmas[i]; - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_d[j] * dt; - } - } else { - // DPM-Solver-2 - float sigma_mid = exp(0.5f * (log(sigmas[i]) + log(sigmas[i + 1]))); - float dt_1 = sigma_mid - sigmas[i]; - float dt_2 = sigmas[i + 1] - sigmas[i]; - - float* vec_d = (float*)d->data; - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = vec_x[j] + vec_d[j] * dt_1; - } - - ggml_tensor* denoised = model(x2, sigma_mid, i + 1); - if (denoised == nullptr) { - return false; - } - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - float d2 = (vec_x2[j] - vec_denoised[j]) / sigma_mid; - vec_x[j] = vec_x[j] + d2 * dt_2; - } - } + } + sigma_up = std::min(sigma_up, sigma_to); + float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up; + sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f; + } +} + +static void generate_rf_ancestral_step(float& sigma_down, float& scale_factor, float& renoise_coeff, float sigma_from, float sigma_to, float eta) { + // downstep_ratio = 1 + (sigma_to / sigma_from - 1) × eta + // sigma_down = sigma_to × downstep_ratio + // scale_factor = (1 - sigma_to) / (1 - sigma_down) + // renoise_coeff = √(sigma_to² - sigma_down² × scale_factor²) + + float downstep_ratio = 1.0f + (sigma_to / sigma_from - 1.0f) * eta; + sigma_down = sigma_to * downstep_ratio; + + float alpha_ip1 = 1.0f - sigma_to; + float alpha_down = 1.0f - sigma_down; + scale_factor = alpha_ip1 / alpha_down; + + float ratio = downstep_ratio * scale_factor; + if (ratio >= 1.0f) { + renoise_coeff = 0.0f; + } else { + renoise_coeff = sigma_to * std::sqrt((1.0f + ratio) * (1.0 - ratio)); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i]); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + ggml_tensor* b, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i], static_cast(b->data)[i]); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + ggml_tensor* b, + ggml_tensor* c, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i], static_cast(b->data)[i], + static_cast(c->data)[i]); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + ggml_tensor* b, + ggml_tensor* c, + ggml_tensor* d, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i], static_cast(b->data)[i], + static_cast(c->data)[i], static_cast(d->data)[i]); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + ggml_tensor* b, + ggml_tensor* c, + ggml_tensor* d, + ggml_tensor* e, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i], static_cast(b->data)[i], + static_cast(c->data)[i], static_cast(d->data)[i], + static_cast(e->data)[i]); + } +} + +template +void denoiser_tensor_iter( + ggml_tensor* a, + ggml_tensor* b, + ggml_tensor* c, + ggml_tensor* d, + ggml_tensor* e, + ggml_tensor* f, + Callable&& fn) { + for (int64_t i = 0; i < ggml_nelements(a); i++) { + fn(static_cast(a->data)[i], static_cast(b->data)[i], + static_cast(c->data)[i], static_cast(d->data)[i], + static_cast(e->data)[i], static_cast(f->data)[i]); + } +} + +static bool euler_a_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + if (denoised == nullptr) { + return false; + } + + denoiser_tensor_iter(d, x, denoised, [sigma](float& d, float& x, float& denoised) { + d = (x - denoised) / sigma; + }); + + // get_ancestral_step + float sigma_up, sigma_down; + generate_ancestral_step(sigma_up, sigma_down, sigma, sigmas[i + 1], eta); + + // Euler method + float dt = sigma_down - sigma; + denoiser_tensor_iter(x, d, [dt](float& x, const float& d) { + x = x + d * dt; + }); + + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [sigma_up](float& x, const float& noise) { + x = x + noise * sigma_up; + }); + } + } + return true; +} + +static bool euler_a_rf_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + if (denoised == nullptr) { + return false; + } + + if (sigmas[i + 1] == 0.0f) { + copy_ggml_tensor(x, denoised); + } else { + + float sigma_down, scale_factor, renoise_coeff; + generate_rf_ancestral_step(sigma_down, scale_factor, renoise_coeff, + sigma, sigmas[i + 1], eta); + float sigma_down_i_ratio = sigma_down / sigma; + denoiser_tensor_iter(x, denoised, [sigma_down_i_ratio](float& x, const float& denoised) { + x = sigma_down_i_ratio * x + (1.0f - sigma_down_i_ratio) * denoised; + }); + + if (eta > 0.0f) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [scale_factor, renoise_coeff](float& x, const float& noise) { + x = scale_factor * x + noise * renoise_coeff; + }); } + } + } + return true; +} + +static bool euler_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + if (denoised == nullptr) { + return false; + } + + denoiser_tensor_iter(d, x, denoised, [sigma](float& d, const float& x, const float& denoised) { + d = (x - denoised) / sigma; + }); - } break; - case DPMPP2S_A_SAMPLE_METHOD: { - ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // denoise - ggml_tensor* denoised = model(x, sigmas[i], -(i + 1)); - if (denoised == nullptr) { - return false; - } - - // get_ancestral_step - float sigma_up = std::min(sigmas[i + 1], - std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); - float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - auto sigma_fn = [](float t) -> float { return exp(-t); }; - - if (sigma_down == 0) { - // d = (x - denoised) / sigmas[i]; - // dt = sigma_down - sigmas[i]; - // x += d * dt; - // => x = denoised - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_denoised[j]; - } - } else { - // DPM-Solver++(2S) - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigma_down); - float h = t_next - t; - float s = t + 0.5f * h; - - float* vec_x = (float*)x->data; - float* vec_x2 = (float*)x2->data; - float* vec_denoised = (float*)denoised->data; - - // First half-step - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x2[j] = (sigma_fn(s) / sigma_fn(t)) * vec_x[j] - (exp(-h * 0.5f) - 1) * vec_denoised[j]; - } - - ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1); - if (denoised == nullptr) { - return false; - } - - // Second half-step - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = (sigma_fn(t_next) / sigma_fn(t)) * vec_x[j] - (exp(-h) - 1) * vec_denoised[j]; - } - } - - // Noise addition - if (sigmas[i + 1] > 0) { - ggml_ext_im_set_randn_f32(noise, rng); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int i = 0; i < ggml_nelements(x); i++) { - vec_x[i] = vec_x[i] + vec_noise[i] * sigma_up; - } - } - } + float dt = sigmas[i + 1] - sigma; + denoiser_tensor_iter(x, d, [dt](float& x, const float& d) { + x = x + d * dt; + }); + } + return true; +} + +static bool heun_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; + + // denoise + ggml_tensor* denoised = model(x, sigma, -(i + 1)); + if (denoised == nullptr) { + return false; + } + + denoiser_tensor_iter(d, x, denoised, [sigma](float& d, const float& x, const float& denoised) { + d = (x - denoised) / sigma; + }); + + float dt = sigma_to - sigma; + if (sigma_to == 0) { + // Euler step + denoiser_tensor_iter(x, d, [dt](float& x, const float& d) { + x = x + d * dt; + }); + } else { + // Heun step + denoiser_tensor_iter(x2, x, d, [dt](float& x2, const float& x, const float& d) { + x2 = x + d * dt; + }); + + ggml_tensor* denoised = model(x2, sigma_to, i + 1); + if (denoised == nullptr) { + return false; } - } break; - case DPMPP2M_SAMPLE_METHOD: // DPM++ (2M) from Karras et al (2022) - { - ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); - - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - - for (int i = 0; i < steps; i++) { - // denoise - ggml_tensor* denoised = model(x, sigmas[i], i + 1); - if (denoised == nullptr) { - return false; - } - - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigmas[i + 1]); - float h = t_next - t; - float a = sigmas[i + 1] / sigmas[i]; - float b = exp(-h) - 1.f; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - float* vec_old_denoised = (float*)old_denoised->data; - - if (i == 0 || sigmas[i + 1] == 0) { - // Simpler step for the edge cases - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; - } - } else { - float h_last = t - t_fn(sigmas[i - 1]); - float r = h_last / h; - for (int j = 0; j < ggml_nelements(x); j++) { - float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; - vec_x[j] = a * vec_x[j] - b * denoised_d; - } - } - - // old_denoised = denoised - for (int j = 0; j < ggml_nelements(x); j++) { - vec_old_denoised[j] = vec_denoised[j]; - } + + denoiser_tensor_iter(d, x, x2, denoised, + [sigma_to, dt](float& d, float& x, const float& x2, const float& denoised) { + float d2 = (x2 - denoised) / sigma_to; + d = (d + d2) / 2; + x = x + d * dt; + }); + } + } + return true; +} + +static bool dpm2_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; + // denoise + ggml_tensor* denoised = model(x, sigma, -(i + 1)); + if (denoised == nullptr) { + return false; + } + + denoiser_tensor_iter(d, x, denoised, [sigma](float& d, const float& x, const float& denoised) { + d = (x - denoised) / sigma; + }); + + if (sigma_to == 0) { + // Euler step + float dt = -sigma; + denoiser_tensor_iter(x, d, [dt](float& x, const float& d) { + x = x + d * dt; + }); + } else { + // DPM-Solver-2 + float sigma_mid = exp(0.5f * (log(sigma) + log(sigma_to))); + float dt_1 = sigma_mid - sigma; + float dt_2 = sigma_to - sigma; + + denoiser_tensor_iter(x2, x, d, + [dt_1](float& x2, const float& x, const float& d) { + x2 = x + d * dt_1; + }); + + ggml_tensor* denoised = model(x2, sigma_mid, i + 1); + if (denoised == nullptr) { + return false; } - } break; - case DPMPP2Mv2_SAMPLE_METHOD: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 - { - ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); - - auto t_fn = [](float sigma) -> float { return -log(sigma); }; - - for (int i = 0; i < steps; i++) { - // denoise - ggml_tensor* denoised = model(x, sigmas[i], i + 1); - if (denoised == nullptr) { - return false; - } - - float t = t_fn(sigmas[i]); - float t_next = t_fn(sigmas[i + 1]); - float h = t_next - t; - float a = sigmas[i + 1] / sigmas[i]; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - float* vec_old_denoised = (float*)old_denoised->data; - - if (i == 0 || sigmas[i + 1] == 0) { - // Simpler step for the edge cases - float b = exp(-h) - 1.f; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = a * vec_x[j] - b * vec_denoised[j]; - } - } else { - float h_last = t - t_fn(sigmas[i - 1]); - float h_min = std::min(h_last, h); - float h_max = std::max(h_last, h); - float r = h_max / h_min; - float h_d = (h_max + h_min) / 2.f; - float b = exp(-h_d) - 1.f; - for (int j = 0; j < ggml_nelements(x); j++) { - float denoised_d = (1.f + 1.f / (2.f * r)) * vec_denoised[j] - (1.f / (2.f * r)) * vec_old_denoised[j]; - vec_x[j] = a * vec_x[j] - b * denoised_d; - } - } - - // old_denoised = denoised - for (int j = 0; j < ggml_nelements(x); j++) { - vec_old_denoised[j] = vec_denoised[j]; - } + denoiser_tensor_iter(x, denoised, x2, + [sigma_mid, dt_2](float& x, const float& denoised, const float& x2) { + float d2 = (x2 - denoised) / sigma_mid; + x = x + d2 * dt_2; + }); + } + } + return true; +} + +static bool dpmpp2s_a_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], -(i + 1)); + if (denoised == nullptr) { + return false; + } + + // get_ancestral_step + float sigma_up, sigma_down; + generate_ancestral_step(sigma_up, sigma_down, sigmas[i], sigmas[i + 1], eta); + + if (sigma_down == 0) { + // d = (x - denoised) / sigmas[i]; + // dt = sigma_down - sigmas[i]; + // x += d * dt; + // => x = denoised + copy_ggml_tensor(x, denoised); + } else { + // DPM-Solver++(2S) + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + auto sigma_fn = [](float t) -> float { return exp(-t); }; + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigma_down); + float h = t_next - t; + float s = t + 0.5f * h; + + // First half-step + denoiser_tensor_iter(x2, x, denoised, + [sigma_fn, s, t, h](float& x2, const float& x, const float& denoised) { + x2 = (sigma_fn(s) / sigma_fn(t)) * x - (exp(-h * 0.5f) - 1) * denoised; + }); + + ggml_tensor* denoised = model(x2, sigmas[i + 1], i + 1); + if (denoised == nullptr) { + return false; } - } break; - case IPNDM_SAMPLE_METHOD: // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main - { - int max_order = 4; - ggml_tensor* x_next = x; - std::vector buffer_model; - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - float sigma_next = sigmas[i + 1]; - - ggml_tensor* x_cur = x_next; - float* vec_x_cur = (float*)x_cur->data; - float* vec_x_next = (float*)x_next->data; - - // Denoising step - ggml_tensor* denoised = model(x_cur, sigma, i + 1); - if (denoised == nullptr) { - return false; - } - float* vec_denoised = (float*)denoised->data; - // d_cur = (x_cur - denoised) / sigma - ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur); - float* vec_d_cur = (float*)d_cur->data; - - for (int j = 0; j < ggml_nelements(d_cur); j++) { - vec_d_cur[j] = (vec_x_cur[j] - vec_denoised[j]) / sigma; - } - - int order = std::min(max_order, i + 1); - - // Calculate vec_x_next based on the order - switch (order) { - case 1: // First Euler step - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * vec_d_cur[j]; - } - break; - - case 2: // Use one history point - { - float* vec_d_prev1 = (float*)buffer_model.back()->data; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (3 * vec_d_cur[j] - vec_d_prev1[j]) / 2; - } - } break; - - case 3: // Use two history points - { - float* vec_d_prev1 = (float*)buffer_model.back()->data; - float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12; - } - } break; - - case 4: // Use three history points - { - float* vec_d_prev1 = (float*)buffer_model.back()->data; - float* vec_d_prev2 = (float*)buffer_model[buffer_model.size() - 2]->data; - float* vec_d_prev3 = (float*)buffer_model[buffer_model.size() - 3]->data; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x_next[j] = vec_x_cur[j] + (sigma_next - sigma) * (55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24; - } - } break; - } - - // Manage buffer_model - if (buffer_model.size() == max_order - 1) { - // Shift elements to the left - for (int k = 0; k < max_order - 2; k++) { - buffer_model[k] = buffer_model[k + 1]; - } - buffer_model.back() = d_cur; // Replace the last element with d_cur - } else { - buffer_model.push_back(d_cur); - } + + // Second half-step + denoiser_tensor_iter(x, denoised, + [sigma_fn, t_next, t, h](float& x, const float& denoised) { + x = (sigma_fn(t_next) / sigma_fn(t)) * x - (exp(-h) - 1) * denoised; + }); + } + + // Noise addition + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [sigma_up](float& x, const float& noise) { + x = x + noise * sigma_up; + }); + } + } + return true; +} + +static bool dpmpp2s_a_rf_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + + size_t steps = sigmas.size() - 1; + + // Allocate working tensors + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + ggml_tensor* u = ggml_dup_tensor(work_ctx, x); + + auto lambda_fn = [](float sigma) -> float { + return logf((1.0f - sigma) / sigma); + }; + + for (int i = 0; i < steps; i++) { + + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; + + ggml_tensor* denoised = model(x, sigma, -(i + 1)); + if (denoised == nullptr) { + return false; + } + + if (sigma_to == 0) { + copy_ggml_tensor(x, denoised); + } else { + float sigma_down, scale_factor, renoise_coeff; + generate_rf_ancestral_step(sigma_down, scale_factor, renoise_coeff, sigma, sigma_to, eta); + float sigma_s; + + if (sigma == 1.0f) { + // Avoid log(0) when sigma = 1.0 + sigma_s = 0.9999f; + } else { + float t_i = lambda_fn(sigma); + float t_down = lambda_fn(sigma_down); + float h = t_down - t_i; + float s = t_i + 0.5f * h; + sigma_s = 1.0f / (exp(s) + 1.0f); } - } break; - case IPNDM_V_SAMPLE_METHOD: // iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main - { - int max_order = 4; - std::vector buffer_model; - ggml_tensor* x_next = x; - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - float t_next = sigmas[i + 1]; - - // Denoising step - ggml_tensor* denoised = model(x, sigma, i + 1); - float* vec_denoised = (float*)denoised->data; - ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x); - float* vec_d_cur = (float*)d_cur->data; - float* vec_x = (float*)x->data; - - // d_cur = (x - denoised) / sigma - for (int j = 0; j < ggml_nelements(d_cur); j++) { - vec_d_cur[j] = (vec_x[j] - vec_denoised[j]) / sigma; - } - - int order = std::min(max_order, i + 1); - float h_n = t_next - sigma; - float h_n_1 = (i > 0) ? (sigma - sigmas[i - 1]) : h_n; - - switch (order) { - case 1: // First Euler step - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x[j] += vec_d_cur[j] * h_n; - } - break; - - case 2: { - float* vec_d_prev1 = (float*)buffer_model.back()->data; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x[j] += h_n * ((2 + (h_n / h_n_1)) * vec_d_cur[j] - (h_n / h_n_1) * vec_d_prev1[j]) / 2; - } - break; - } - - case 3: { - float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1; - float* vec_d_prev1 = (float*)buffer_model.back()->data; - float* vec_d_prev2 = (buffer_model.size() > 1) ? (float*)buffer_model[buffer_model.size() - 2]->data : vec_d_prev1; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x[j] += h_n * ((23 * vec_d_cur[j] - 16 * vec_d_prev1[j] + 5 * vec_d_prev2[j]) / 12); - } - break; - } - - case 4: { - float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1; - float h_n_3 = (i > 2) ? (sigmas[i - 2] - sigmas[i - 3]) : h_n_2; - float* vec_d_prev1 = (float*)buffer_model.back()->data; - float* vec_d_prev2 = (buffer_model.size() > 1) ? (float*)buffer_model[buffer_model.size() - 2]->data : vec_d_prev1; - float* vec_d_prev3 = (buffer_model.size() > 2) ? (float*)buffer_model[buffer_model.size() - 3]->data : vec_d_prev2; - for (int j = 0; j < ggml_nelements(x_next); j++) { - vec_x[j] += h_n * ((55 * vec_d_cur[j] - 59 * vec_d_prev1[j] + 37 * vec_d_prev2[j] - 9 * vec_d_prev3[j]) / 24); - } - break; - } - } - - // Manage buffer_model - if (buffer_model.size() == max_order - 1) { - buffer_model.erase(buffer_model.begin()); - } - buffer_model.push_back(d_cur); - - // Prepare the next d tensor - d_cur = ggml_dup_tensor(work_ctx, x_next); + + float sigma_s_i_ratio = sigma_s / sigma; + denoiser_tensor_iter(u, x, denoised, + [sigma_s_i_ratio](float& u, const float& x, const float& denoised) { + u = sigma_s_i_ratio * x + (1.0f - sigma_s_i_ratio) * denoised; + }); + + // Second denoise step with u + ggml_tensor* D_i = model(u, sigma_s, i); + if (D_i == nullptr) { + return false; } - } break; - case LCM_SAMPLE_METHOD: // Latent Consistency Models - { - ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - ggml_tensor* d = ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - - // denoise - ggml_tensor* denoised = model(x, sigma, i + 1); - if (denoised == nullptr) { - return false; - } - - // x = denoised - { - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_denoised[j]; - } - } - - if (sigmas[i + 1] > 0) { - // x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) - ggml_ext_im_set_randn_f32(noise, rng); - // noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin"); - { - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + sigmas[i + 1] * vec_noise[j]; - } - } - } + + float sigma_down_i_ratio = sigma_down / sigma; + denoiser_tensor_iter(x, D_i, [sigma_down_i_ratio](float& x, const float& D_i) { + x = sigma_down_i_ratio * x + (1.0f - sigma_down_i_ratio) * D_i; + }); + + if (sigmas[i + 1] > 0 && eta > 0) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [scale_factor, renoise_coeff](float& x, const float& noise) { + x = scale_factor * x + noise * renoise_coeff; + }); } - } break; - case DDIM_TRAILING_SAMPLE_METHOD: // Denoising Diffusion Implicit Models - // with the "trailing" timestep spacing - { - // See J. Song et al., "Denoising Diffusion Implicit - // Models", arXiv:2010.02502 [cs.LG] - // - // DDIM itself needs alphas_cumprod (DDPM, J. Ho et al., - // arXiv:2006.11239 [cs.LG] with k-diffusion's start and - // end beta) (which unfortunately k-diffusion's data - // structure hides from the denoiser), and the sigmas are - // also needed to invert the behavior of CompVisDenoiser - // (k-diffusion's LMSDiscreteSchedulerr) - float beta_start = 0.00085f; - float beta_end = 0.0120f; - std::vector alphas_cumprod; - std::vector compvis_sigmas; - - alphas_cumprod.reserve(TIMESTEPS); - compvis_sigmas.reserve(TIMESTEPS); - for (int i = 0; i < TIMESTEPS; i++) { - alphas_cumprod[i] = - (i == 0 ? 1.0f : alphas_cumprod[i - 1]) * - (1.0f - - std::pow(sqrtf(beta_start) + - (sqrtf(beta_end) - sqrtf(beta_start)) * - ((float)i / (TIMESTEPS - 1)), - 2)); - compvis_sigmas[i] = - std::sqrt((1 - alphas_cumprod[i]) / - alphas_cumprod[i]); + } + + } + + return true; +} + + +// DPM++ (2M) from Karras et al (2022) +static bool dpmpp2m_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + if (denoised == nullptr) { + return false; + } + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + float b = exp(-h) - 1.f; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + denoiser_tensor_iter(x, denoised, [a, b](float& x, const float& denoised) { + x = a * x - b * denoised; + }); + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float r = h_last / h; + denoiser_tensor_iter(x, denoised, old_denoised, + [r, a, b](float& x, const float& denoised, const float& old_denoised) { + float denoised_d = (1.f + 1.f / (2.f * r)) * denoised - (1.f / (2.f * r)) * old_denoised; + x = a * x - b * denoised_d; + }); + } + + copy_ggml_tensor(old_denoised, denoised); + } + return true; +} + +// Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 +static bool dpmpp2mv2_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + auto t_fn = [](float sigma) -> float { return -log(sigma); }; + + for (int i = 0; i < steps; i++) { + // denoise + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + if (denoised == nullptr) { + return false; + } + + float t = t_fn(sigmas[i]); + float t_next = t_fn(sigmas[i + 1]); + float h = t_next - t; + float a = sigmas[i + 1] / sigmas[i]; + + if (i == 0 || sigmas[i + 1] == 0) { + // Simpler step for the edge cases + float b = exp(-h) - 1.f; + denoiser_tensor_iter(x, denoised, [a, b](float& x, const float& denoised) { + x = a * x - b * denoised; + }); + } else { + float h_last = t - t_fn(sigmas[i - 1]); + float h_min = std::min(h_last, h); + float h_max = std::max(h_last, h); + float r = h_max / h_min; + float h_d = (h_max + h_min) / 2.f; + float b = exp(-h_d) - 1.f; + denoiser_tensor_iter(x, denoised, old_denoised, + [r, a, b](float& x, const float& denoised, const float& old_denoised) { + float denoised_d = (1.f + 1.f / (2.f * r)) * denoised - (1.f / (2.f * r)) * old_denoised; + x = a * x - b * denoised_d; + }); + } + + copy_ggml_tensor(old_denoised, denoised); + } + return true; +} +// iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main +static bool ipndm_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + int max_order = 4; + ggml_tensor* x_next = x; + std::vector buffer_model; + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float sigma_next = sigmas[i + 1]; + + ggml_tensor* x_cur = x_next; + float* vec_x_cur = (float*)x_cur->data; + float* vec_x_next = (float*)x_next->data; + + // Denoising step + ggml_tensor* denoised = model(x_cur, sigma, i + 1); + if (denoised == nullptr) { + return false; + } + + ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x_cur); + denoiser_tensor_iter(d_cur, x_cur, denoised, + [sigma](float& d_cur, const float& x_cur, const float& denoised) { + d_cur = (x_cur - denoised) / sigma; + }); + + int order = std::min(max_order, i + 1); + + // Calculate vec_x_next based on the order + switch (order) { + case 1: // First Euler step + denoiser_tensor_iter(x_next, x_cur, d_cur, + [sigma_next, sigma](float& x_next, const float& x_cur, const float& d_cur) { + x_next = x_cur + (sigma_next - sigma) * d_cur; + }); + break; + + case 2: // Use one history point + denoiser_tensor_iter(x_next, x_cur, d_cur, buffer_model.back(), + [sigma_next, sigma](float& x_next, const float& x_cur, const float& d_cur, const float& d_prev1) { + x_next = x_cur + (sigma_next - sigma) * (3 * d_cur - d_prev1) / 2; + }); + break; + + case 3: // Use two history points + denoiser_tensor_iter(x_next, x_cur, d_cur, buffer_model.back(), + buffer_model[buffer_model.size() - 2], + [sigma_next, sigma](float& x_next, const float& x_cur, const float& d_cur, const float& d_prev1, const float& d_prev2) { + x_next = x_cur + (sigma_next - sigma) * (23 * d_cur - 16 * d_prev1 + 5 * d_prev2) / 12; + }); + break; + + case 4: // Use three history points + denoiser_tensor_iter(x_next, x_cur, d_cur, buffer_model.back(), + buffer_model[buffer_model.size() - 2], buffer_model[buffer_model.size() - 3], + [sigma_next, sigma](float& x_next, const float& x_cur, const float& d_cur, const float& d_prev1, const float& d_prev2, const float& d_prev3) { + x_next = x_cur + (sigma_next - sigma) * (55 * d_cur - 59 * d_prev1 + 37 * d_prev2 - 9 * d_prev3) / 24; + }); + break; + } + + // Manage buffer_model + if (buffer_model.size() == max_order - 1) { + // Shift elements to the left + for (int k = 0; k < max_order - 2; k++) { + buffer_model[k] = buffer_model[k + 1]; } + buffer_model.back() = d_cur; // Replace the last element with d_cur + } else { + buffer_model.push_back(d_cur); + } + } - ggml_tensor* pred_original_sample = - ggml_dup_tensor(work_ctx, x); - ggml_tensor* variance_noise = - ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // The "trailing" DDIM timestep, see S. Lin et al., - // "Common Diffusion Noise Schedulers and Sample Steps - // are Flawed", arXiv:2305.08891 [cs], p. 4, Table - // 2. Most variables below follow Diffusers naming - // - // Diffuser naming vs. Song et al. (2010), p. 5, (12) - // and p. 16, (16) ( -> ): - // - // - pred_noise_t -> epsilon_theta^(t)(x_t) - // - pred_original_sample -> f_theta^(t)(x_t) or x_0 - // - std_dev_t -> sigma_t (not the LMS sigma) - // - eta -> eta (set to 0 at the moment) - // - pred_sample_direction -> "direction pointing to - // x_t" - // - pred_prev_sample -> "x_t-1" - int timestep = static_cast(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1; - // 1. get previous step value (=t-1) - int prev_timestep = timestep - TIMESTEPS / static_cast(steps); - // The sigma here is chosen to cause the - // CompVisDenoiser to produce t = timestep - float sigma = static_cast(compvis_sigmas[timestep]); - if (i == 0) { - // The function add_noise intializes x to - // Diffusers' latents * sigma (as in Diffusers' - // pipeline) or sample * sigma (Diffusers' - // scheduler), where this sigma = init_noise_sigma - // in Diffusers. For DDPM and DDIM however, - // init_noise_sigma = 1. But the k-diffusion - // model() also evaluates F_theta(c_in(sigma) x; - // ...) instead of the bare U-net F_theta, with - // c_in = 1 / sqrt(sigma^2 + 1), as defined in - // T. Karras et al., "Elucidating the Design Space - // of Diffusion-Based Generative Models", - // arXiv:2206.00364 [cs.CV], p. 3, Table 1. Hence - // the first call has to be prescaled as x <- x / - // (c_in * sigma) with the k-diffusion pipeline - // and CompVisDenoiser. - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] *= std::sqrt(sigma * sigma + 1) / - sigma; - } - } else { - // For the subsequent steps after the first one, - // at this point x = latents or x = sample, and - // needs to be prescaled with x <- sample / c_in - // to compensate for model() applying the scale - // c_in before the U-net F_theta - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] *= std::sqrt(sigma * sigma + 1); - } - } - // Note (also noise_pred in Diffuser's pipeline) - // model_output = model() is the D(x, sigma) as - // defined in Karras et al. (2022), p. 3, Table 1 and - // p. 8 (7), compare also p. 38 (226) therein. - ggml_tensor* model_output = - model(x, sigma, i + 1); - // Here model_output is still the k-diffusion denoiser - // output, not the U-net output F_theta(c_in(sigma) x; - // ...) in Karras et al. (2022), whereas Diffusers' - // model_output is F_theta(...). Recover the actual - // model_output, which is also referred to as the - // "Karras ODE derivative" d or d_cur in several - // samplers above. - { - float* vec_x = (float*)x->data; - float* vec_model_output = - (float*)model_output->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_model_output[j] = - (vec_x[j] - vec_model_output[j]) * - (1 / sigma); - } - } - // 2. compute alphas, betas - float alpha_prod_t = static_cast(alphas_cumprod[timestep]); - // Note final_alpha_cumprod = alphas_cumprod[0] due to - // trailing timestep spacing - float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); - float beta_prod_t = 1 - alpha_prod_t; - // 3. compute predicted original sample from predicted - // noise also called "predicted x_0" of formula (12) - // from https://arxiv.org/pdf/2010.02502.pdf - { - float* vec_x = (float*)x->data; - float* vec_model_output = - (float*)model_output->data; - float* vec_pred_original_sample = - (float*)pred_original_sample->data; - // Note the substitution of latents or sample = x - // * c_in = x / sqrt(sigma^2 + 1) - for (int j = 0; j < ggml_nelements(x); j++) { - vec_pred_original_sample[j] = - (vec_x[j] / std::sqrt(sigma * sigma + 1) - - std::sqrt(beta_prod_t) * - vec_model_output[j]) * - (1 / std::sqrt(alpha_prod_t)); - } - } - // Assuming the "epsilon" prediction type, where below - // pred_epsilon = model_output is inserted, and is not - // defined/copied explicitly. - // - // 5. compute variance: "sigma_t(eta)" -> see formula - // (16) - // - // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) * - // sqrt(1 - alpha_t/alpha_t-1) - float beta_prod_t_prev = 1 - alpha_prod_t_prev; - float variance = (beta_prod_t_prev / beta_prod_t) * - (1 - alpha_prod_t / alpha_prod_t_prev); - float std_dev_t = eta * std::sqrt(variance); - // 6. compute "direction pointing to x_t" of formula - // (12) from https://arxiv.org/pdf/2010.02502.pdf - // 7. compute x_t without "random noise" of formula - // (12) from https://arxiv.org/pdf/2010.02502.pdf - { - float* vec_model_output = (float*)model_output->data; - float* vec_pred_original_sample = - (float*)pred_original_sample->data; - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - // Two step inner loop without an explicit - // tensor - float pred_sample_direction = - ::sqrtf(1 - alpha_prod_t_prev - - ::powf(std_dev_t, 2)) * - vec_model_output[j]; - vec_x[j] = std::sqrt(alpha_prod_t_prev) * - vec_pred_original_sample[j] + - pred_sample_direction; - } - } - if (eta > 0) { - ggml_ext_im_set_randn_f32(variance_noise, rng); - float* vec_variance_noise = - (float*)variance_noise->data; - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] += std_dev_t * vec_variance_noise[j]; - } - } - // See the note above: x = latents or sample here, and - // is not scaled by the c_in. For the final output - // this is correct, but for subsequent iterations, x - // needs to be prescaled again, since k-diffusion's - // model() differes from the bare U-net F_theta by the - // factor c_in. + return true; +} +// iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main +static bool ipndm_v_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas) { + size_t steps = sigmas.size() - 1; + int max_order = 4; + std::vector buffer_model; + ggml_tensor* x_next = x; + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float t_next = sigmas[i + 1]; + + // Denoising step + ggml_tensor* denoised = model(x, sigma, i + 1); + ggml_tensor* d_cur = ggml_dup_tensor(work_ctx, x); + + denoiser_tensor_iter(d_cur, x, denoised, + [sigma](float& d_cur, const float& x, const float& denoised) { + d_cur = (x - denoised) / sigma; + }); + + int order = std::min(max_order, i + 1); + float h_n = t_next - sigma; + float h_n_1 = (i > 0) ? (sigma - sigmas[i - 1]) : h_n; + + switch (order) { + case 1: // First Euler step + denoiser_tensor_iter(x, d_cur, + [h_n](float& x, const float& d_cur) { + x += d_cur * h_n; + }); + break; + + case 2: + denoiser_tensor_iter(x, d_cur, buffer_model.back(), + [h_n, h_n_1](float& x, const float& d_cur, const float& d_prev1) { + x += h_n * ((2 + (h_n / h_n_1)) * d_cur - (h_n / h_n_1) * d_prev1) / 2; + }); + break; + + case 3: { + float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1; + ggml_tensor* d_prev1 = buffer_model.back(); + ggml_tensor* d_prev2 = (buffer_model.size() > 1) ? buffer_model[buffer_model.size() - 2] : d_prev1; + denoiser_tensor_iter(x, d_cur, d_prev1, d_prev2, + [h_n_2](float& x, const float& d_cur, const float& d_prev1, const float& d_prev2) { + x += h_n_2 * ((23 * d_cur - 16 * d_prev1 + 5 * d_prev2) / 12); + }); + break; } - } break; - case TCD_SAMPLE_METHOD: // Strategic Stochastic Sampling (Algorithm 4) in - // Trajectory Consistency Distillation - { - // See J. Zheng et al., "Trajectory Consistency - // Distillation: Improved Latent Consistency Distillation - // by Semi-Linear Consistency Function with Trajectory - // Mapping", arXiv:2402.19159 [cs.CV] - float beta_start = 0.00085f; - float beta_end = 0.0120f; - std::vector alphas_cumprod; - std::vector compvis_sigmas; - - alphas_cumprod.reserve(TIMESTEPS); - compvis_sigmas.reserve(TIMESTEPS); - for (int i = 0; i < TIMESTEPS; i++) { - alphas_cumprod[i] = - (i == 0 ? 1.0f : alphas_cumprod[i - 1]) * - (1.0f - - std::pow(sqrtf(beta_start) + - (sqrtf(beta_end) - sqrtf(beta_start)) * - ((float)i / (TIMESTEPS - 1)), - 2)); - compvis_sigmas[i] = - std::sqrt((1 - alphas_cumprod[i]) / - alphas_cumprod[i]); + + case 4: { + float h_n_2 = (i > 1) ? (sigmas[i - 1] - sigmas[i - 2]) : h_n_1; + float h_n_3 = (i > 2) ? (sigmas[i - 2] - sigmas[i - 3]) : h_n_2; + ggml_tensor* d_prev1 = buffer_model.back(); + ggml_tensor* d_prev2 = (buffer_model.size() > 1) ? buffer_model[buffer_model.size() - 2] : d_prev1; + ggml_tensor* d_prev3 = (buffer_model.size() > 2) ? buffer_model[buffer_model.size() - 3] : d_prev2; + denoiser_tensor_iter(x, d_cur, d_prev1, d_prev2, d_prev3, + [h_n_3](float& x, const float& d_cur, const float& d_prev1, const float& d_prev2, const float& d_prev3) { + x += h_n_3 * ((55 * d_cur - 59 * d_prev1 + 37 * d_prev2 - 9 * d_prev3) / 24); + }); + break; } - int original_steps = 50; - - ggml_tensor* pred_original_sample = - ggml_dup_tensor(work_ctx, x); - ggml_tensor* noise = - ggml_dup_tensor(work_ctx, x); - - for (int i = 0; i < steps; i++) { - // Analytic form for TCD timesteps - int timestep = TIMESTEPS - 1 - - (TIMESTEPS / original_steps) * - (int)floor(i * ((float)original_steps / steps)); - // 1. get previous step value - int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps)); - // Here timestep_s is tau_n' in Algorithm 4. The _s - // notation appears to be that from C. Lu, - // "DPM-Solver: A Fast ODE Solver for Diffusion - // Probabilistic Model Sampling in Around 10 Steps", - // arXiv:2206.00927 [cs.LG], but this notation is not - // continued in Algorithm 4, where _n' is used. - int timestep_s = - (int)floor((1 - eta) * prev_timestep); - // Begin k-diffusion specific workaround for - // evaluating F_theta(x; ...) from D(x, sigma), same - // as in DDIM (and see there for detailed comments) - float sigma = static_cast(compvis_sigmas[timestep]); - if (i == 0) { - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] *= std::sqrt(sigma * sigma + 1) / - sigma; - } - } else { - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] *= std::sqrt(sigma * sigma + 1); - } - } - ggml_tensor* model_output = - model(x, sigma, i + 1); - { - float* vec_x = (float*)x->data; - float* vec_model_output = - (float*)model_output->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_model_output[j] = - (vec_x[j] - vec_model_output[j]) * - (1 / sigma); - } - } - // 2. compute alphas, betas - // - // When comparing TCD with DDPM/DDIM note that Zheng - // et al. (2024) follows the DPM-Solver notation for - // alpha. One can find the following comment in the - // original DPM-Solver code - // (https://github.com/LuChengTHU/dpm-solver/): - // "**Important**: Please pay special attention for - // the args for `alphas_cumprod`: The `alphas_cumprod` - // is the \hat{alpha_n} arrays in the notations of - // DDPM. [...] Therefore, the notation \hat{alpha_n} - // is different from the notation alpha_t in - // DPM-Solver. In fact, we have alpha_{t_n} = - // \sqrt{\hat{alpha_n}}, [...]" - float alpha_prod_t = static_cast(alphas_cumprod[timestep]); - float beta_prod_t = 1 - alpha_prod_t; - // Note final_alpha_cumprod = alphas_cumprod[0] since - // TCD is always "trailing" - float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); - // The subscript _s are the only portion in this - // section (2) unique to TCD - float alpha_prod_s = static_cast(alphas_cumprod[timestep_s]); - float beta_prod_s = 1 - alpha_prod_s; - // 3. Compute the predicted noised sample x_s based on - // the model parameterization - // - // This section is also exactly the same as DDIM - { - float* vec_x = (float*)x->data; - float* vec_model_output = - (float*)model_output->data; - float* vec_pred_original_sample = - (float*)pred_original_sample->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_pred_original_sample[j] = - (vec_x[j] / std::sqrt(sigma * sigma + 1) - - std::sqrt(beta_prod_t) * - vec_model_output[j]) * - (1 / std::sqrt(alpha_prod_t)); - } - } - // This consistency function step can be difficult to - // decipher from Algorithm 4, as it is simply stated - // using a consistency function. This step is the - // modified DDIM, i.e. p. 8 (32) in Zheng et - // al. (2024), with eta set to 0 (see the paragraph - // immediately thereafter that states this somewhat - // obliquely). - { - float* vec_pred_original_sample = - (float*)pred_original_sample->data; - float* vec_model_output = - (float*)model_output->data; - float* vec_x = (float*)x->data; - for (int j = 0; j < ggml_nelements(x); j++) { - // Substituting x = pred_noised_sample and - // pred_epsilon = model_output - vec_x[j] = - std::sqrt(alpha_prod_s) * - vec_pred_original_sample[j] + - std::sqrt(beta_prod_s) * - vec_model_output[j]; - } - } - // 4. Sample and inject noise z ~ N(0, I) for - // MultiStep Inference Noise is not used on the final - // timestep of the timestep schedule. This also means - // that noise is not used for one-step sampling. Eta - // (referred to as "gamma" in the paper) was - // introduced to control the stochasticity in every - // step. When eta = 0, it represents deterministic - // sampling, whereas eta = 1 indicates full stochastic - // sampling. - if (eta > 0 && i != steps - 1) { - // In this case, x is still pred_noised_sample, - // continue in-place - ggml_ext_im_set_randn_f32(noise, rng); - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - for (int j = 0; j < ggml_nelements(x); j++) { - // Corresponding to (35) in Zheng et - // al. (2024), substituting x = - // pred_noised_sample - vec_x[j] = - std::sqrt(alpha_prod_t_prev / - alpha_prod_s) * - vec_x[j] + - std::sqrt(1 - alpha_prod_t_prev / - alpha_prod_s) * - vec_noise[j]; - } - } + } + + // Manage buffer_model + if (buffer_model.size() == max_order - 1) { + buffer_model.erase(buffer_model.begin()); + } + buffer_model.push_back(d_cur); + + // Prepare the next d tensor + d_cur = ggml_dup_tensor(work_ctx, x_next); + } + return true; +} + +// Latent Consistency Models +static bool lcm_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1); + if (denoised == nullptr) { + return false; + } + + // x = denoised + copy_ggml_tensor(x, denoised); + + if (sigma_to > 0) { + // x += sigmas[i + 1] * noise_sampler(sigmas[i], sigmas[i + 1]) + ggml_ext_im_set_randn_f32(noise, rng); + // noise = load_tensor_from_file(res_ctx, "./rand" + std::to_string(i+1) + ".bin"); + denoiser_tensor_iter(x, noise, [sigma_to](float& x, const float& noise) { + x = x + noise * sigma_to; + }); + } + } + return true; +} + +// Denoising Diffusion Implicit Models +// with the "trailing" timestep spacing +static bool ddim_trailing_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + // See J. Song et al., "Denoising Diffusion Implicit + // Models", arXiv:2010.02502 [cs.LG] + // + // DDIM itself needs alphas_cumprod (DDPM, J. Ho et al., + // arXiv:2006.11239 [cs.LG] with k-diffusion's start and + // end beta) (which unfortunately k-diffusion's data + // structure hides from the denoiser), and the sigmas are + // also needed to invert the behavior of CompVisDenoiser + // (k-diffusion's LMSDiscreteSchedulerr) + float beta_start = 0.00085f; + float beta_end = 0.0120f; + std::vector alphas_cumprod; + std::vector compvis_sigmas; + + alphas_cumprod.reserve(TIMESTEPS); + compvis_sigmas.reserve(TIMESTEPS); + for (int i = 0; i < TIMESTEPS; i++) { + alphas_cumprod[i] = + (i == 0 ? 1.0f : alphas_cumprod[i - 1]) * + (1.0f - + std::pow(sqrtf(beta_start) + + (sqrtf(beta_end) - sqrtf(beta_start)) * + ((float)i / (TIMESTEPS - 1)), + 2)); + compvis_sigmas[i] = + std::sqrt((1 - alphas_cumprod[i]) / + alphas_cumprod[i]); + } + + ggml_tensor* pred_original_sample = + ggml_dup_tensor(work_ctx, x); + ggml_tensor* variance_noise = + ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // The "trailing" DDIM timestep, see S. Lin et al., + // "Common Diffusion Noise Schedulers and Sample Steps + // are Flawed", arXiv:2305.08891 [cs], p. 4, Table + // 2. Most variables below follow Diffusers naming + // + // Diffuser naming vs. Song et al. (2010), p. 5, (12) + // and p. 16, (16) ( -> ): + // + // - pred_noise_t -> epsilon_theta^(t)(x_t) + // - pred_original_sample -> f_theta^(t)(x_t) or x_0 + // - std_dev_t -> sigma_t (not the LMS sigma) + // - eta -> eta (set to 0 at the moment) + // - pred_sample_direction -> "direction pointing to + // x_t" + // - pred_prev_sample -> "x_t-1" + int timestep = static_cast(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1; + // 1. get previous step value (=t-1) + int prev_timestep = timestep - TIMESTEPS / static_cast(steps); + // The sigma here is chosen to cause the + // CompVisDenoiser to produce t = timestep + float sigma = static_cast(compvis_sigmas[timestep]); + if (i == 0) { + // The function add_noise intializes x to + // Diffusers' latents * sigma (as in Diffusers' + // pipeline) or sample * sigma (Diffusers' + // scheduler), where this sigma = init_noise_sigma + // in Diffusers. For DDPM and DDIM however, + // init_noise_sigma = 1. But the k-diffusion + // model() also evaluates F_theta(c_in(sigma) x; + // ...) instead of the bare U-net F_theta, with + // c_in = 1 / sqrt(sigma^2 + 1), as defined in + // T. Karras et al., "Elucidating the Design Space + // of Diffusion-Based Generative Models", + // arXiv:2206.00364 [cs.CV], p. 3, Table 1. Hence + // the first call has to be prescaled as x <- x / + // (c_in * sigma) with the k-diffusion pipeline + // and CompVisDenoiser. + denoiser_tensor_iter(x, [sigma](float& x) { + x *= std::sqrt(sigma * sigma + 1) / sigma; + }); + } else { + // For the subsequent steps after the first one, + // at this point x = latents or x = sample, and + // needs to be prescaled with x <- sample / c_in + // to compensate for model() applying the scale + // c_in before the U-net F_theta + denoiser_tensor_iter(x, [sigma](float& x) { + x *= std::sqrt(sigma * sigma + 1); + }); + } + // Note (also noise_pred in Diffuser's pipeline) + // model_output = model() is the D(x, sigma) as + // defined in Karras et al. (2022), p. 3, Table 1 and + // p. 8 (7), compare also p. 38 (226) therein. + ggml_tensor* model_output = + model(x, sigma, i + 1); + // Here model_output is still the k-diffusion denoiser + // output, not the U-net output F_theta(c_in(sigma) x; + // ...) in Karras et al. (2022), whereas Diffusers' + // model_output is F_theta(...). Recover the actual + // model_output, which is also referred to as the + // "Karras ODE derivative" d or d_cur in several + // samplers above. + denoiser_tensor_iter(model_output, x, [sigma](float& model_output, const float& x) { + model_output = (x - model_output) * (1 / sigma); + }); + // 2. compute alphas, betas + float alpha_prod_t = static_cast(alphas_cumprod[timestep]); + // Note final_alpha_cumprod = alphas_cumprod[0] due to + // trailing timestep spacing + float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); + float beta_prod_t = 1 - alpha_prod_t; + // 3. compute predicted original sample from predicted + // noise also called "predicted x_0" of formula (12) + // from https://arxiv.org/pdf/2010.02502.pdf + denoiser_tensor_iter(pred_original_sample, model_output, x, + [sigma, beta_prod_t, alpha_prod_t](float& pred_original_sample, const float& model_output, const float& x) { + // Note the substitution of latents or sample = x + // * c_in = x / sqrt(sigma^2 + 1) + pred_original_sample = + (x / std::sqrt(sigma * sigma + 1) - + std::sqrt(beta_prod_t) * + model_output) * + (1 / std::sqrt(alpha_prod_t)); + }); + // Assuming the "epsilon" prediction type, where below + // pred_epsilon = model_output is inserted, and is not + // defined/copied explicitly. + // + // 5. compute variance: "sigma_t(eta)" -> see formula + // (16) + // + // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) * + // sqrt(1 - alpha_t/alpha_t-1) + float beta_prod_t_prev = 1 - alpha_prod_t_prev; + float variance = (beta_prod_t_prev / beta_prod_t) * + (1 - alpha_prod_t / alpha_prod_t_prev); + float std_dev_t = eta * std::sqrt(variance); + // 6. compute "direction pointing to x_t" of formula + // (12) from https://arxiv.org/pdf/2010.02502.pdf + // 7. compute x_t without "random noise" of formula + // (12) from https://arxiv.org/pdf/2010.02502.pdf + denoiser_tensor_iter(x, model_output, pred_original_sample, + [alpha_prod_t_prev, std_dev_t](float& x, const float& model_output, const float& pred_original_sample) { + // Two step inner loop without an explicit tensor + float pred_sample_direction = + ::sqrtf(1 - alpha_prod_t_prev - + ::powf(std_dev_t, 2)) * + model_output; + x = std::sqrt(alpha_prod_t_prev) * + pred_original_sample + + pred_sample_direction; + }); + if (eta > 0) { + ggml_ext_im_set_randn_f32(variance_noise, rng); + denoiser_tensor_iter(x, variance_noise, [std_dev_t](float& x, const float& variance_noise) { + x = x + variance_noise * std_dev_t; + }); + } + // See the note above: x = latents or sample here, and + // is not scaled by the c_in. For the final output + // this is correct, but for subsequent iterations, x + // needs to be prescaled again, since k-diffusion's + // model() differes from the bare U-net F_theta by the + // factor c_in. + } + return true; +} + +// Strategic Stochastic Sampling (Algorithm 4) in Trajectory Consistency Distillation +static bool tcd_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + + // See J. Zheng et al., "Trajectory Consistency + // Distillation: Improved Latent Consistency Distillation + // by Semi-Linear Consistency Function with Trajectory + // Mapping", arXiv:2402.19159 [cs.CV] + float beta_start = 0.00085f; + float beta_end = 0.0120f; + std::vector alphas_cumprod; + std::vector compvis_sigmas; + + alphas_cumprod.reserve(TIMESTEPS); + compvis_sigmas.reserve(TIMESTEPS); + for (int i = 0; i < TIMESTEPS; i++) { + alphas_cumprod[i] = + (i == 0 ? 1.0f : alphas_cumprod[i - 1]) * + (1.0f - + std::pow(sqrtf(beta_start) + + (sqrtf(beta_end) - sqrtf(beta_start)) * + ((float)i / (TIMESTEPS - 1)), + 2)); + compvis_sigmas[i] = + std::sqrt((1 - alphas_cumprod[i]) / + alphas_cumprod[i]); + } + int original_steps = 50; + + ggml_tensor* pred_original_sample = + ggml_dup_tensor(work_ctx, x); + ggml_tensor* noise = + ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + // Analytic form for TCD timesteps + int timestep = TIMESTEPS - 1 - + (TIMESTEPS / original_steps) * + (int)floor(i * ((float)original_steps / steps)); + // 1. get previous step value + int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps)); + // Here timestep_s is tau_n' in Algorithm 4. The _s + // notation appears to be that from C. Lu, + // "DPM-Solver: A Fast ODE Solver for Diffusion + // Probabilistic Model Sampling in Around 10 Steps", + // arXiv:2206.00927 [cs.LG], but this notation is not + // continued in Algorithm 4, where _n' is used. + int timestep_s = + (int)floor((1 - eta) * prev_timestep); + // Begin k-diffusion specific workaround for + // evaluating F_theta(x; ...) from D(x, sigma), same + // as in DDIM (and see there for detailed comments) + float sigma = static_cast(compvis_sigmas[timestep]); + if (i == 0) { + denoiser_tensor_iter(x, [sigma](float& x) { + x *= std::sqrt(sigma * sigma + 1) / sigma; + }); + } else { + denoiser_tensor_iter(x, [sigma](float& x) { + x *= std::sqrt(sigma * sigma + 1); + }); + } + ggml_tensor* model_output = model(x, sigma, i + 1); + denoiser_tensor_iter(model_output, x, [sigma](float& model_output, const float& x) { + model_output = (x - model_output) * (1 / sigma); + }); + // 2. compute alphas, betas + // + // When comparing TCD with DDPM/DDIM note that Zheng + // et al. (2024) follows the DPM-Solver notation for + // alpha. One can find the following comment in the + // original DPM-Solver code + // (https://github.com/LuChengTHU/dpm-solver/): + // "**Important**: Please pay special attention for + // the args for `alphas_cumprod`: The `alphas_cumprod` + // is the \hat{alpha_n} arrays in the notations of + // DDPM. [...] Therefore, the notation \hat{alpha_n} + // is different from the notation alpha_t in + // DPM-Solver. In fact, we have alpha_{t_n} = + // \sqrt{\hat{alpha_n}}, [...]" + float alpha_prod_t = static_cast(alphas_cumprod[timestep]); + float beta_prod_t = 1 - alpha_prod_t; + // Note final_alpha_cumprod = alphas_cumprod[0] since + // TCD is always "trailing" + float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); + // The subscript _s are the only portion in this + // section (2) unique to TCD + float alpha_prod_s = static_cast(alphas_cumprod[timestep_s]); + float beta_prod_s = 1 - alpha_prod_s; + // 3. Compute the predicted noised sample x_s based on + // the model parameterization + // + // This section is also exactly the same as DDIM + denoiser_tensor_iter(pred_original_sample, model_output, x, + [sigma, beta_prod_t, alpha_prod_t](float& pred_original_sample, const float& model_output, const float& x) { + // Note the substitution of latents or sample = x + // * c_in = x / sqrt(sigma^2 + 1) + pred_original_sample = + (x / std::sqrt(sigma * sigma + 1) - + std::sqrt(beta_prod_t) * + model_output) * + (1 / std::sqrt(alpha_prod_t)); + }); + // This consistency function step can be difficult to + // decipher from Algorithm 4, as it is simply stated + // using a consistency function. This step is the + // modified DDIM, i.e. p. 8 (32) in Zheng et + // al. (2024), with eta set to 0 (see the paragraph + // immediately thereafter that states this somewhat + // obliquely). + denoiser_tensor_iter(x, model_output, pred_original_sample, + [alpha_prod_s, beta_prod_s](float& x, const float& model_output, const float& pred_original_sample) { + // Substituting x = pred_noised_sample and + // pred_epsilon = model_output + x = std::sqrt(alpha_prod_s) * pred_original_sample + + std::sqrt(beta_prod_s) * model_output; + }); + // 4. Sample and inject noise z ~ N(0, I) for + // MultiStep Inference Noise is not used on the final + // timestep of the timestep schedule. This also means + // that noise is not used for one-step sampling. Eta + // (referred to as "gamma" in the paper) was + // introduced to control the stochasticity in every + // step. When eta = 0, it represents deterministic + // sampling, whereas eta = 1 indicates full stochastic + // sampling. + if (eta > 0 && i != steps - 1) { + // In this case, x is still pred_noised_sample, + // continue in-place + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [alpha_prod_t_prev, alpha_prod_s](float& x, float& noise) { + // Corresponding to (35) in Zheng et + // al. (2024), substituting x = + // pred_noised_sample + x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x + + std::sqrt(1 - alpha_prod_t_prev / alpha_prod_s) * noise; + }); + } + } + return true; +} + +// Res Multistep sampler +static bool res_multistep_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); + + bool have_old_sigma = false; + float old_sigma_down = 0.0f; + + auto t_fn = [](float sigma) -> float { return -logf(sigma); }; + auto sigma_fn = [](float t) -> float { return expf(-t); }; + auto phi1_fn = [](float t) -> float { + if (fabsf(t) < 1e-6f) { + return 1.0f + t * 0.5f + (t * t) / 6.0f; + } + return (expf(t) - 1.0f) / t; + }; + auto phi2_fn = [&](float t) -> float { + if (fabsf(t) < 1e-6f) { + return 0.5f + t / 6.0f + (t * t) / 24.0f; + } + float phi1_val = phi1_fn(t); + return (phi1_val - 1.0f) / t; + }; + + for (int i = 0; i < steps; i++) { + ggml_tensor* denoised = model(x, sigmas[i], i + 1); + if (denoised == nullptr) { + return false; + } + + float sigma_from = sigmas[i]; + float sigma_to = sigmas[i + 1]; + float sigma_up, sigma_down; + generate_ancestral_step(sigma_up, sigma_down, sigma_from, sigma_to, eta); + + if (sigma_down == 0.0f || !have_old_sigma) { + float dt = sigma_down - sigma_from; + denoiser_tensor_iter(x, denoised, [sigma_from, dt](float& x, const float& denoised) { + float d = (x - denoised) / sigma_from; + x = x + d * dt; + }); + } else { + float t = t_fn(sigma_from); + float t_old = t_fn(old_sigma_down); + float t_next = t_fn(sigma_down); + float t_prev = t_fn(sigmas[i - 1]); + float h = t_next - t; + float c2 = (t_prev - t_old) / h; + + float phi1_val = phi1_fn(-h); + float phi2_val = phi2_fn(-h); + float b1 = phi1_val - phi2_val / c2; + float b2 = phi2_val / c2; + + if (!std::isfinite(b1)) { + b1 = 0.0f; } - } break; - case RES_MULTISTEP_SAMPLE_METHOD: // Res Multistep sampler - { - ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); - - bool have_old_sigma = false; - float old_sigma_down = 0.0f; - - auto t_fn = [](float sigma) -> float { return -logf(sigma); }; - auto sigma_fn = [](float t) -> float { return expf(-t); }; - auto phi1_fn = [](float t) -> float { - if (fabsf(t) < 1e-6f) { - return 1.0f + t * 0.5f + (t * t) / 6.0f; - } - return (expf(t) - 1.0f) / t; - }; - auto phi2_fn = [&](float t) -> float { - if (fabsf(t) < 1e-6f) { - return 0.5f + t / 6.0f + (t * t) / 24.0f; - } - float phi1_val = phi1_fn(t); - return (phi1_val - 1.0f) / t; - }; - - for (int i = 0; i < steps; i++) { - ggml_tensor* denoised = model(x, sigmas[i], i + 1); - if (denoised == nullptr) { - return false; - } - - float sigma_from = sigmas[i]; - float sigma_to = sigmas[i + 1]; - float sigma_up = 0.0f; - float sigma_down = sigma_to; - - if (eta > 0.0f) { - float sigma_from_sq = sigma_from * sigma_from; - float sigma_to_sq = sigma_to * sigma_to; - if (sigma_from_sq > 0.0f) { - float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq; - if (term > 0.0f) { - sigma_up = eta * std::sqrt(term); - } - } - sigma_up = std::min(sigma_up, sigma_to); - float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up; - sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f; - } - - if (sigma_down == 0.0f || !have_old_sigma) { - float dt = sigma_down - sigma_from; - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - float d = (vec_x[j] - vec_denoised[j]) / sigma_from; - vec_x[j] = vec_x[j] + d * dt; - } - } else { - float t = t_fn(sigma_from); - float t_old = t_fn(old_sigma_down); - float t_next = t_fn(sigma_down); - float t_prev = t_fn(sigmas[i - 1]); - float h = t_next - t; - float c2 = (t_prev - t_old) / h; - - float phi1_val = phi1_fn(-h); - float phi2_val = phi2_fn(-h); - float b1 = phi1_val - phi2_val / c2; - float b2 = phi2_val / c2; - - if (!std::isfinite(b1)) { - b1 = 0.0f; - } - if (!std::isfinite(b2)) { - b2 = 0.0f; - } - - float sigma_h = sigma_fn(h); - float* vec_x = (float*)x->data; - float* vec_denoised = (float*)denoised->data; - float* vec_old_denoised = (float*)old_denoised->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = sigma_h * vec_x[j] + h * (b1 * vec_denoised[j] + b2 * vec_old_denoised[j]); - } - } - - if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { - ggml_ext_im_set_randn_f32(noise, rng); - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up; - } - } - - float* vec_old_denoised = (float*)old_denoised->data; - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_old_denoised[j] = vec_denoised[j]; - } - - old_sigma_down = sigma_down; - have_old_sigma = true; + if (!std::isfinite(b2)) { + b2 = 0.0f; } - } break; - case RES_2S_SAMPLE_METHOD: // Res 2s sampler - { - ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); - ggml_tensor* x0 = ggml_dup_tensor(work_ctx, x); - ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); - - const float c2 = 0.5f; - auto t_fn = [](float sigma) -> float { return -logf(sigma); }; - auto phi1_fn = [](float t) -> float { - if (fabsf(t) < 1e-6f) { - return 1.0f + t * 0.5f + (t * t) / 6.0f; - } - return (expf(t) - 1.0f) / t; - }; - auto phi2_fn = [&](float t) -> float { - if (fabsf(t) < 1e-6f) { - return 0.5f + t / 6.0f + (t * t) / 24.0f; - } - float phi1_val = phi1_fn(t); - return (phi1_val - 1.0f) / t; - }; - - for (int i = 0; i < steps; i++) { - float sigma_from = sigmas[i]; - float sigma_to = sigmas[i + 1]; - - ggml_tensor* denoised = model(x, sigma_from, -(i + 1)); - if (denoised == nullptr) { - return false; - } - - float sigma_up = 0.0f; - float sigma_down = sigma_to; - if (eta > 0.0f) { - float sigma_from_sq = sigma_from * sigma_from; - float sigma_to_sq = sigma_to * sigma_to; - if (sigma_from_sq > 0.0f) { - float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq; - if (term > 0.0f) { - sigma_up = eta * std::sqrt(term); - } - } - sigma_up = std::min(sigma_up, sigma_to); - float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up; - sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f; - } - - float* vec_x = (float*)x->data; - float* vec_x0 = (float*)x0->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x0[j] = vec_x[j]; - } - - if (sigma_down == 0.0f || sigma_from == 0.0f) { - float* vec_denoised = (float*)denoised->data; - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_denoised[j]; - } - } else { - float t = t_fn(sigma_from); - float t_next = t_fn(sigma_down); - float h = t_next - t; - - float a21 = c2 * phi1_fn(-h * c2); - float phi1_val = phi1_fn(-h); - float phi2_val = phi2_fn(-h); - float b2 = phi2_val / c2; - float b1 = phi1_val - b2; - - float sigma_c2 = expf(-(t + h * c2)); - - float* vec_denoised = (float*)denoised->data; - float* vec_x2 = (float*)x2->data; - for (int j = 0; j < ggml_nelements(x); j++) { - float eps1 = vec_denoised[j] - vec_x0[j]; - vec_x2[j] = vec_x0[j] + h * a21 * eps1; - } - - ggml_tensor* denoised2 = model(x2, sigma_c2, i + 1); - if (denoised2 == nullptr) { - return false; - } - float* vec_denoised2 = (float*)denoised2->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - float eps1 = vec_denoised[j] - vec_x0[j]; - float eps2 = vec_denoised2[j] - vec_x0[j]; - vec_x[j] = vec_x0[j] + h * (b1 * eps1 + b2 * eps2); - } - } - - if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { - ggml_ext_im_set_randn_f32(noise, rng); - float* vec_x = (float*)x->data; - float* vec_noise = (float*)noise->data; - - for (int j = 0; j < ggml_nelements(x); j++) { - vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up; - } - } + + float sigma_h = sigma_fn(h); + denoiser_tensor_iter(x, denoised, old_denoised, + [sigma_h, h, b1, b2](float& x, const float& denoised, const float& old_denoised) { + x = sigma_h * x + h * (b1 * denoised + b2 * old_denoised); + }); + } + + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [sigma_up](float& x, const float& noise) { + x = x + noise * sigma_up; + }); + } + + copy_ggml_tensor(old_denoised, denoised); + old_sigma_down = sigma_down; + have_old_sigma = true; + } + return true; +} + +// Res 2s sampler +static bool res_2s_sample_method(denoise_cb_t model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + size_t steps = sigmas.size() - 1; + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x0 = ggml_dup_tensor(work_ctx, x); + ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); + + const float c2 = 0.5f; + auto t_fn = [](float sigma) -> float { return -logf(sigma); }; + auto phi1_fn = [](float t) -> float { + if (fabsf(t) < 1e-6f) { + return 1.0f + t * 0.5f + (t * t) / 6.0f; + } + return (expf(t) - 1.0f) / t; + }; + auto phi2_fn = [&](float t) -> float { + if (fabsf(t) < 1e-6f) { + return 0.5f + t / 6.0f + (t * t) / 24.0f; + } + float phi1_val = phi1_fn(t); + return (phi1_val - 1.0f) / t; + }; + + for (int i = 0; i < steps; i++) { + float sigma_from = sigmas[i]; + float sigma_to = sigmas[i + 1]; + + ggml_tensor* denoised = model(x, sigma_from, -(i + 1)); + if (denoised == nullptr) { + return false; + } + + float sigma_up, sigma_down; + generate_ancestral_step(sigma_up, sigma_down, sigma_from, sigma_to, eta); + + copy_ggml_tensor(x0, x); + + if (sigma_down == 0.0f || sigma_from == 0.0f) { + copy_ggml_tensor(x, denoised); + } else { + float t = t_fn(sigma_from); + float t_next = t_fn(sigma_down); + float h = t_next - t; + + float a21 = c2 * phi1_fn(-h * c2); + float phi1_val = phi1_fn(-h); + float phi2_val = phi2_fn(-h); + float b2 = phi2_val / c2; + float b1 = phi1_val - b2; + + float sigma_c2 = expf(-(t + h * c2)); + + denoiser_tensor_iter(x2, denoised, x0, + [h, a21](float& x2, const float& denoised, const float& x0) { + float eps1 = denoised - x0; + x2 = x0 + h * a21 * eps1; + }); + + ggml_tensor* denoised2 = model(x2, sigma_c2, i + 1); + if (denoised2 == nullptr) { + return false; } - } break; + denoiser_tensor_iter(x, denoised, denoised2, x0, + [h, b1, b2](float& x, const float& denoised, const float& denoised2, const float& x0) { + float eps1 = denoised - x0; + float eps2 = denoised2 - x0; + x = x0 + h * (b1 * eps1 + b2 * eps2); + }); + } + + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { + ggml_ext_im_set_randn_f32(noise, rng); + denoiser_tensor_iter(x, noise, [sigma_up](float& x, const float& noise) { + x = x + noise * sigma_up; + }); + } + } + + return true; +} + +// k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t +static bool sample_k_diffusion(sample_method_t method, + denoise_cb_t model, + ggml_context* work_ctx, + ggml_tensor* x, + std::vector sigmas, + std::shared_ptr rng, + float eta, + float flow_denoiser) { + switch (method) { + case EULER_A_SAMPLE_METHOD: + if (flow_denoiser) + return euler_a_rf_sample_method(model, work_ctx, x, sigmas, rng, eta); + else + return euler_a_sample_method(model, work_ctx, x, sigmas, rng, eta); + case EULER_SAMPLE_METHOD: + return euler_sample_method(model, work_ctx, x, sigmas); + case HEUN_SAMPLE_METHOD: + return heun_sample_method(model, work_ctx, x, sigmas); + case DPM2_SAMPLE_METHOD: + return dpm2_sample_method(model, work_ctx, x, sigmas); + case DPMPP2S_A_SAMPLE_METHOD: + if (flow_denoiser) + return dpmpp2s_a_rf_sample_method(model, work_ctx, x, sigmas, rng, eta); + else + return dpmpp2s_a_sample_method(model, work_ctx, x, sigmas, rng, eta); + case DPMPP2M_SAMPLE_METHOD: + return dpmpp2m_sample_method(model, work_ctx, x, sigmas); + case DPMPP2Mv2_SAMPLE_METHOD: + return dpmpp2mv2_sample_method(model, work_ctx, x, sigmas); + case IPNDM_SAMPLE_METHOD: + return ipndm_sample_method(model, work_ctx, x, sigmas); + case IPNDM_V_SAMPLE_METHOD: + return ipndm_v_sample_method(model, work_ctx, x, sigmas); + case LCM_SAMPLE_METHOD: + return lcm_sample_method(model, work_ctx, x, sigmas, rng); + case DDIM_TRAILING_SAMPLE_METHOD: + return ddim_trailing_sample_method(model, work_ctx, x, sigmas, rng, eta); + case TCD_SAMPLE_METHOD: + return tcd_sample_method(model, work_ctx, x, sigmas, rng, eta); + case RES_MULTISTEP_SAMPLE_METHOD: + return res_multistep_sample_method(model, work_ctx, x, sigmas, rng, eta); + case RES_2S_SAMPLE_METHOD: + return res_2s_sample_method(model, work_ctx, x, sigmas, rng, eta); default: LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); return false; } - return true; } - #endif // __DENOISER_HPP__ diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index bbf2f979d..6af608fe3 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2000,6 +2000,7 @@ class StableDiffusionGGML { float eta, int shifted_timestep, sample_method_t method, + bool is_flow_denoiser, const std::vector& sigmas, int start_merge_step, SDCondition id_cond, @@ -2347,7 +2348,7 @@ class StableDiffusionGGML { return denoised; }; - if (!sample_k_diffusion(method, denoise, work_ctx, x, sigmas, sampler_rng, eta)) { + if (!sample_k_diffusion(method, denoise, work_ctx, x, sigmas, sampler_rng, eta, is_flow_denoiser)) { LOG_ERROR("Diffusion model sampling failed"); if (control_net) { control_net->free_control_ctx(); @@ -2461,6 +2462,12 @@ class StableDiffusionGGML { flow_denoiser->set_shift(flow_shift); } } + + bool is_flow_denoiser() { + auto flow_denoiser = std::dynamic_pointer_cast(denoiser); + return !!flow_denoiser; + } + }; /*================================================= SD API ==================================================*/ @@ -2777,6 +2784,7 @@ void sd_sample_params_init(sd_sample_params_t* sample_params) { sample_params->scheduler = SCHEDULER_COUNT; sample_params->sample_method = SAMPLE_METHOD_COUNT; sample_params->sample_steps = 20; + sample_params->eta = INFINITY; sample_params->custom_sigmas = nullptr; sample_params->custom_sigmas_count = 0; sample_params->flow_shift = INFINITY; @@ -2953,6 +2961,21 @@ enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx) { return EULER_A_SAMPLE_METHOD; } +static float sd_get_default_eta(enum sample_method_t sample_method) { + switch(sample_method) { + case DDIM_TRAILING_SAMPLE_METHOD: + case TCD_SAMPLE_METHOD: + case RES_MULTISTEP_SAMPLE_METHOD: + case RES_2S_SAMPLE_METHOD: + return 0.0f; + case EULER_A_SAMPLE_METHOD: + case DPMPP2S_A_SAMPLE_METHOD: + return 1.0f; + default: + return INFINITY; + } +} + enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_method_t sample_method) { if (sd_ctx != nullptr && sd_ctx->sd != nullptr) { auto edm_v_denoiser = std::dynamic_pointer_cast(sd_ctx->sd->denoiser); @@ -2978,6 +3001,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, int width, int height, enum sample_method_t sample_method, + bool is_flow_denoiser, const std::vector& sigmas, int64_t seed, int batch_count, @@ -3178,6 +3202,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, eta, shifted_timestep, sample_method, + is_flow_denoiser, sigmas, start_merge_step, id_cond, @@ -3331,7 +3356,17 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (sample_method == SAMPLE_METHOD_COUNT) { sample_method = sd_get_default_sample_method(sd_ctx); } - LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + float eta = sd_img_gen_params->sample_params.eta; + float default_eta = sd_get_default_eta(sample_method); + if (default_eta != INFINITY) { + if (eta == INFINITY) { + eta = default_eta; + } + LOG_INFO("sampling using %s method (eta %g)", sampling_methods_str[sample_method], eta); + } else { + LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + } + bool is_flow_denoiser = sd_ctx->sd->is_flow_denoiser(); int sample_steps = sd_img_gen_params->sample_params.sample_steps; std::vector sigmas; @@ -3546,11 +3581,12 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g SAFE_STR(sd_img_gen_params->negative_prompt), sd_img_gen_params->clip_skip, guidance, - sd_img_gen_params->sample_params.eta, + eta, sd_img_gen_params->sample_params.shifted_timestep, width, height, sample_method, + is_flow_denoiser, sigmas, seed, sd_img_gen_params->batch_count, @@ -3610,6 +3646,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s sample_method = sd_get_default_sample_method(sd_ctx); } LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); + bool is_flow_denoiser = sd_ctx->sd->is_flow_denoiser(); int high_noise_sample_steps = 0; if (sd_ctx->sd->high_noise_diffusion_model) { @@ -3930,6 +3967,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s sd_vid_gen_params->high_noise_sample_params.eta, sd_vid_gen_params->high_noise_sample_params.shifted_timestep, high_noise_sample_method, + is_flow_denoiser, high_noise_sigmas, -1, {}, @@ -3967,6 +4005,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s sd_vid_gen_params->sample_params.eta, sd_vid_gen_params->sample_params.shifted_timestep, sample_method, + is_flow_denoiser, sigmas, -1, {},