diff --git a/examples/cli/README.md b/examples/cli/README.md index 904f3c441..bb2dc9e1e 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -127,10 +127,10 @@ Generation Options: --disable-auto-resize-ref-image disable auto resize of ref images -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, - tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a + tcd, res_multistep, res_2s, euler_cfg_pp, euler_a_cfg_pp] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, - ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, + ddim_trailing, tcd, res_multistep, res_2s, euler_cfg_pp, euler_a_cfg_pp] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/examples/common/common.hpp b/examples/common/common.hpp index 9389b03a3..14be0a962 100644 --- a/examples/common/common.hpp +++ b/examples/common/common.hpp @@ -1467,7 +1467,7 @@ struct SDGenerationParams { on_seed_arg}, {"", "--sampling-method", - "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] " + "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, euler_cfg_pp, euler_a_cfg_pp] " "(default: euler for Flux/SD3/Wan, euler_a otherwise)", on_sample_method_arg}, {"", diff --git a/examples/server/README.md b/examples/server/README.md index 8aa2158f5..b47915331 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -207,10 +207,10 @@ Default Generation Options: --disable-auto-resize-ref-image disable auto resize of ref images -s, --seed RNG seed (default: 42, use random seed for < 0) --sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, - tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a + tcd, res_multistep, res_2s, euler_cfg_pp, euler_a_cfg_pp] (default: euler for Flux/SD3/Wan, euler_a otherwise) --high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, - ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, + ddim_trailing, tcd, res_multistep, res_2s, euler_cfg_pp, euler_a_cfg_pp] default: euler for Flux/SD3/Wan, euler_a otherwise --scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete diff --git a/examples/server/main.cpp b/examples/server/main.cpp index 6e4340a61..142c6463b 100644 --- a/examples/server/main.cpp +++ b/examples/server/main.cpp @@ -931,7 +931,11 @@ int main(int argc, const char** argv) { {"res multistep", RES_MULTISTEP_SAMPLE_METHOD}, {"k_res_multistep", RES_MULTISTEP_SAMPLE_METHOD}, {"res 2s", RES_2S_SAMPLE_METHOD}, - {"k_res_2s", RES_2S_SAMPLE_METHOD}}; + {"k_res_2s", RES_2S_SAMPLE_METHOD}, + {"euler_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, + {"keuler_cfg_pp", EULER_CFG_PP_SAMPLE_METHOD}, + {"euler_a_cfg_pp", EULER_A_CFG_PP_SAMPLE_METHOD}, + {"keuler_a_cfg_pp", EULER_A_CFG_PP_SAMPLE_METHOD}}; auto it = hardcoded.find(name); if (it != hardcoded.end()) return it->second; return SAMPLE_METHOD_COUNT; diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index 029c2ab1d..fe3646fd0 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -50,6 +50,8 @@ enum sample_method_t { TCD_SAMPLE_METHOD, RES_MULTISTEP_SAMPLE_METHOD, RES_2S_SAMPLE_METHOD, + EULER_CFG_PP_SAMPLE_METHOD, + EULER_A_CFG_PP_SAMPLE_METHOD, SAMPLE_METHOD_COUNT }; diff --git a/src/denoiser.hpp b/src/denoiser.hpp index b92ca4e3f..a6d810ba3 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -759,16 +759,21 @@ struct Flux2FlowDenoiser : public FluxFlowDenoiser { } }; -typedef std::function denoise_cb_t; +typedef std::function denoise_cb_t; // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t static bool sample_k_diffusion(sample_method_t method, - denoise_cb_t model, + denoise_cb_t raw_model, ggml_context* work_ctx, ggml_tensor* x, std::vector sigmas, std::shared_ptr rng, float eta) { + + auto model = [&](ggml_tensor* x, float sigma, int step, ggml_tensor** uncond = nullptr) { + return raw_model(x, sigma, step, uncond); + }; + size_t steps = sigmas.size() - 1; // sample_euler_ancestral switch (method) { @@ -1897,6 +1902,102 @@ static bool sample_k_diffusion(sample_method_t method, } } } break; + case EULER_CFG_PP_SAMPLE_METHOD: // Euler CFG++ sampler from https://cfgpp-diffusion.github.io/ + { + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + ggml_tensor* uncond_denoised = nullptr; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1, &uncond_denoised); + if (denoised == nullptr || uncond_denoised == nullptr) { + return false; + } + + // d = (x - uncond_denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_uncond = (float*)uncond_denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_uncond[j]) / sigma; + } + } + + // Euler method (CFG++) + // x = denoised + d * sigmas[i + 1] + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_denoised[j] + vec_d[j] * sigmas[i + 1]; + } + } + } + } break; + case EULER_A_CFG_PP_SAMPLE_METHOD: // Euler ancestral CFG++ sampler from https://cfgpp-diffusion.github.io/ + { + ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); + ggml_tensor* d = ggml_dup_tensor(work_ctx, x); + + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + ggml_tensor* uncond_denoised = nullptr; + + // denoise + ggml_tensor* denoised = model(x, sigma, i + 1, &uncond_denoised); + if (denoised == nullptr || uncond_denoised == nullptr) { + return false; + } + + // d = (x - uncond_denoised) / sigma + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_uncond = (float*)uncond_denoised->data; + + for (int j = 0; j < ggml_nelements(d); j++) { + vec_d[j] = (vec_x[j] - vec_uncond[j]) / sigma; + } + } + + // get_ancestral_step + float sigma_up = std::min(sigmas[i + 1], + std::sqrt(sigmas[i + 1] * sigmas[i + 1] * (sigmas[i] * sigmas[i] - sigmas[i + 1] * sigmas[i + 1]) / (sigmas[i] * sigmas[i]))); + float sigma_down = std::sqrt(sigmas[i + 1] * sigmas[i + 1] - sigma_up * sigma_up); + + // Euler method (CFG++) + // x = denoised + d * sigma_down + { + float* vec_d = (float*)d->data; + float* vec_x = (float*)x->data; + float* vec_denoised = (float*)denoised->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_denoised[j] + vec_d[j] * sigma_down; + } + } + + if (sigmas[i + 1] > 0) { + // x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up + ggml_ext_im_set_randn_f32(noise, rng); + // noise = load_tensor_from_file(work_ctx, "./rand" + std::to_string(i+1) + ".bin"); + { + float* vec_x = (float*)x->data; + float* vec_noise = (float*)noise->data; + + for (int j = 0; j < ggml_nelements(x); j++) { + vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up; + } + } + } + } + } break; default: LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index bbf2f979d..a3c8c6b28 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -72,6 +72,8 @@ const char* sampling_methods_str[] = { "TCD", "Res Multistep", "Res 2s", + "Euler CFG++", + "Euler A CFG++", }; /*================================================== Helper Functions ================================================*/ @@ -261,7 +263,12 @@ static void init_cachedit_runtime(SampleCacheRuntime& runtime, static void init_spectrum_runtime(SampleCacheRuntime& runtime, SDVersion version, const sd_cache_params_t& cache_params, - const std::vector& sigmas) { + const std::vector& sigmas, + sample_method_t method) { + if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) { + LOG_WARN("Spectrum requested but not supported for CFG++ samplers"); + return; + } if (!sd_version_is_unet(version) && !sd_version_is_dit(version)) { LOG_WARN("Spectrum requested but not supported for this model type (only UNET and DiT models)"); return; @@ -289,7 +296,8 @@ static void init_spectrum_runtime(SampleCacheRuntime& runtime, static SampleCacheRuntime init_sample_cache_runtime(SDVersion version, const sd_cache_params_t* cache_params, Denoiser* denoiser, - const std::vector& sigmas) { + const std::vector& sigmas, + sample_method_t method) { SampleCacheRuntime runtime; if (cache_params == nullptr || cache_params->mode == SD_CACHE_DISABLED) { return runtime; @@ -315,7 +323,7 @@ static SampleCacheRuntime init_sample_cache_runtime(SDVersion version, init_cachedit_runtime(runtime, version, *cache_params, sigmas); break; case SD_CACHE_SPECTRUM: - init_spectrum_runtime(runtime, version, *cache_params, sigmas); + init_spectrum_runtime(runtime, version, *cache_params, sigmas, method); break; default: break; @@ -2035,7 +2043,7 @@ class StableDiffusionGGML { img_cfg_scale = cfg_scale; } - SampleCacheRuntime cache_runtime = init_sample_cache_runtime(version, cache_params, denoiser.get(), sigmas); + SampleCacheRuntime cache_runtime = init_sample_cache_runtime(version, cache_params, denoiser.get(), sigmas, method); size_t steps = sigmas.size() - 1; ggml_tensor* x = ggml_ext_dup_and_cpy_tensor(work_ctx, init_latent); @@ -2071,6 +2079,10 @@ class StableDiffusionGGML { out_img_cond = ggml_dup_tensor(work_ctx, x); } ggml_tensor* denoised = ggml_dup_tensor(work_ctx, x); + ggml_tensor* uncond_denoised = nullptr; + if (method == EULER_CFG_PP_SAMPLE_METHOD || method == EULER_A_CFG_PP_SAMPLE_METHOD) { + uncond_denoised = ggml_dup_tensor(work_ctx, x); + } int64_t t0 = ggml_time_us(); @@ -2099,7 +2111,7 @@ class StableDiffusionGGML { } } - auto denoise = [&](ggml_tensor* input, float sigma, int step) -> ggml_tensor* { + auto denoise = [&](ggml_tensor* input, float sigma, int step, ggml_tensor** uncond_out) -> ggml_tensor* { auto sd_preview_cb = sd_get_preview_callback(); auto sd_preview_cb_data = sd_get_preview_callback_data(); auto sd_preview_mode = sd_get_preview_mode(); @@ -2136,24 +2148,26 @@ class StableDiffusionGGML { timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask); if (cache_runtime.spectrum_enabled && cache_runtime.spectrum.should_predict()) { - cache_runtime.spectrum.predict(denoised); + if (uncond_out == nullptr) { + cache_runtime.spectrum.predict(denoised); - if (denoise_mask != nullptr) { - apply_mask(denoised, init_latent, denoise_mask); - } + if (denoise_mask != nullptr) { + apply_mask(denoised, init_latent, denoise_mask); + } - if (sd_preview_cb != nullptr && sd_should_preview_denoised()) { - if (step % sd_get_preview_interval() == 0) { - preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, sd_preview_cb_data, false); + if (sd_preview_cb != nullptr && sd_should_preview_denoised()) { + if (step % sd_get_preview_interval() == 0) { + preview_image(work_ctx, step, denoised, version, sd_preview_mode, preview_tensor, sd_preview_cb, sd_preview_cb_data, false); + } } - } - int64_t t1 = ggml_time_us(); - if (step > 0 || step == -(int)steps) { - int showstep = std::abs(step); - pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep); + int64_t t1 = ggml_time_us(); + if (step > 0 || step == -(int)steps) { + int showstep = std::abs(step); + pretty_progress(showstep, (int)steps, (t1 - t0) / 1000000.f / showstep); + } + return denoised; } - return denoised; } auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec); @@ -2289,6 +2303,11 @@ class StableDiffusionGGML { float* positive_data = (float*)out_cond->data; int ne_elements = (int)ggml_nelements(denoised); + float* vec_uncond_denoised = nullptr; + if (uncond_out != nullptr) { + vec_uncond_denoised = (float*)uncond_denoised->data; + } + if (shifted_timestep > 0 && sd_version_is_sdxl(version)) { int64_t shifted_t_idx = static_cast(roundf(timesteps_vec[0])); float shifted_sigma = denoiser->t_to_sigma((float)shifted_t_idx); @@ -2303,6 +2322,7 @@ class StableDiffusionGGML { for (int i = 0; i < ne_elements; i++) { float latent_result = positive_data[i]; + float uncond_result = has_unconditioned ? negative_data[i] : positive_data[i]; if (has_unconditioned) { // out_uncond + cfg_scale * (out_cond - out_uncond) if (has_img_cond) { @@ -2322,6 +2342,14 @@ class StableDiffusionGGML { // v = latent_result, eps = latent_result // denoised = (v * c_out + input * c_skip) or (input + eps * c_out) vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip; + + if (vec_uncond_denoised) { + vec_uncond_denoised[i] = uncond_result * c_out + vec_input[i] * c_skip; + } + } + + if (uncond_out != nullptr) { + *uncond_out = uncond_denoised; } if (cache_runtime.spectrum_enabled) { @@ -2521,6 +2549,8 @@ const char* sample_method_to_str[] = { "tcd", "res_multistep", "res_2s", + "euler_cfg_pp", + "euler_a_cfg_pp", }; const char* sd_sample_method_name(enum sample_method_t sample_method) {