2025-12-13 05:48:56 +00:00
5 changed files with 57 additions and 86 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -146,7 +146,7 @@ jobs:
            sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip

  windows-latest-cmake:
-    runs-on: windows-2025
+    runs-on: windows-2019

    env:
      VULKAN_VERSION: 1.3.261.1
--- a/common.hpp
+++ b/common.hpp
@ -57,7 +57,7 @@ public:
        auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);

        x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST);  // [N, channels, h*2, w*2]
-        x = conv->forward(ctx, x);                             // [N, out_channels, h*2, w*2]
+        x = conv->forward(ctx, x);    // [N, out_channels, h*2, w*2]
        return x;
    }
 };
--- a/denoiser.hpp
+++ b/denoiser.hpp
@ -168,21 +168,24 @@ struct AYSSchedule : SigmaSchedule {
        std::vector<float> inputs;
        std::vector<float> results(n + 1);

-        if (sd_version_is_sd2((SDVersion)version)) {
-            LOG_WARN("AYS not designed for SD2.X models");
-        } /* fallthrough */
-        else if (sd_version_is_sd1((SDVersion)version)) {
-            LOG_INFO("AYS using SD1.5 noise levels");
-            inputs = noise_levels[0];
-        } else if (sd_version_is_sdxl((SDVersion)version)) {
-            LOG_INFO("AYS using SDXL noise levels");
-            inputs = noise_levels[1];
-        } else if (version == VERSION_SVD) {
-            LOG_INFO("AYS using SVD noise levels");
-            inputs = noise_levels[2];
-        } else {
-            LOG_ERROR("Version not compatable with AYS scheduler");
-            return results;
+        switch (version) {
+            case VERSION_SD2: /* fallthrough */
+                LOG_WARN("AYS not designed for SD2.X models");
+            case VERSION_SD1:
+                LOG_INFO("AYS using SD1.5 noise levels");
+                inputs = noise_levels[0];
+                break;
+            case VERSION_SDXL:
+                LOG_INFO("AYS using SDXL noise levels");
+                inputs = noise_levels[1];
+                break;
+            case VERSION_SVD:
+                LOG_INFO("AYS using SVD noise levels");
+                inputs = noise_levels[2];
+                break;
+            default:
+                LOG_ERROR("Version not compatable with AYS scheduler");
+                return results;
        }

        /* Stretches those pre-calculated reference levels out to the desired
@ -343,32 +346,6 @@ struct CompVisVDenoiser : public CompVisDenoiser {
    }
 };

-struct EDMVDenoiser : public CompVisVDenoiser {
-    float min_sigma = 0.002;
-    float max_sigma = 120.0;
-
-    EDMVDenoiser(float min_sigma = 0.002, float max_sigma = 120.0)
-        : min_sigma(min_sigma), max_sigma(max_sigma) {
-        schedule = std::make_shared<ExponentialSchedule>();
-    }
-
-    float t_to_sigma(float t) {
-        return std::exp(t * 4 / (float)TIMESTEPS);
-    }
-
-    float sigma_to_t(float s) {
-        return 0.25 * std::log(s);
-    }
-
-    float sigma_min() {
-        return min_sigma;
-    }
-
-    float sigma_max() {
-        return max_sigma;
-    }
-};
-
 float time_snr_shift(float alpha, float t) {
    if (alpha == 1.0f) {
        return t;
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -118,7 +118,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
                                     a->ne[1] * b->ne[1],
                                     a->ne[2] * b->ne[2],
                                     a->ne[3] * b->ne[3],
-                                     GGML_SCALE_MODE_NEAREST),
+                                     GGML_SCALE_MODE_NEAREST), 
                    b);
 }

@ -602,8 +602,6 @@ typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;

 // Tiling
 __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const int scale, const int tile_size, const float tile_overlap_factor, on_tile_process on_processing) {
-    output = ggml_set_f32(output, 0);
-
    int input_width   = (int)input->ne[0];
    int input_height  = (int)input->ne[1];
    int output_width  = (int)output->ne[0];
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -103,9 +103,6 @@ public:
    bool vae_tiling           = false;
    bool stacked_id           = false;

-    bool is_using_v_parameterization     = false;
-    bool is_using_edm_v_parameterization = false;
-
    std::map<std::string, struct ggml_tensor*> tensors;

    std::string lora_model_dir;
@ -546,17 +543,12 @@ public:
        LOG_INFO("loading model from '%s' completed, taking %.2fs", model_path.c_str(), (t1 - t0) * 1.0f / 1000);

        // check is_using_v_parameterization_for_sd2
-
+        bool is_using_v_parameterization = false;
        if (sd_version_is_sd2(version)) {
            if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
                is_using_v_parameterization = true;
            }
        } else if (sd_version_is_sdxl(version)) {
-            if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) {
-                // CosXL models
-                // TODO: get sigma_min and sigma_max values from file
-                is_using_edm_v_parameterization = true;
-            }
            if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) {
                is_using_v_parameterization = true;
            }
@ -581,9 +573,6 @@ public:
        } else if (is_using_v_parameterization) {
            LOG_INFO("running in v-prediction mode");
            denoiser = std::make_shared<CompVisVDenoiser>();
-        } else if (is_using_edm_v_parameterization) {
-            LOG_INFO("running in v-prediction EDM mode");
-            denoiser = std::make_shared<EDMVDenoiser>();
        } else {
            LOG_INFO("running in eps-prediction mode");
        }
@ -1407,7 +1396,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
    SDCondition uncond;
    if (cfg_scale != 1.0) {
        bool force_zero_embeddings = false;
-        if (sd_version_is_sdxl(sd_ctx->sd->version) && negative_prompt.size() == 0 && !sd_ctx->sd->is_using_edm_v_parameterization) {
+        if (sd_version_is_sdxl(sd_ctx->sd->version) && negative_prompt.size() == 0) {
            force_zero_embeddings = true;
        }
        uncond = sd_ctx->sd->cond_stage_model->get_learned_condition(work_ctx,
@ -1566,29 +1555,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx,
    return result_images;
 }

-ggml_tensor* generate_init_latent(sd_ctx_t* sd_ctx,
-                                  ggml_context* work_ctx,
-                                  int width,
-                                  int height) {
-    int C = 4;
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        C = 16;
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        C = 16;
-    }
-    int W                    = width / 8;
-    int H                    = height / 8;
-    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.0609f);
-    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
-        ggml_set_f32(init_latent, 0.1159f);
-    } else {
-        ggml_set_f32(init_latent, 0.f);
-    }
-    return init_latent;
-}
-
 sd_image_t* txt2img(sd_ctx_t* sd_ctx,
                    const char* prompt_c_str,
                    const char* negative_prompt_c_str,
@ -1645,12 +1611,27 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,

    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);

+    int C = 4;
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        C = 16;
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        C = 16;
+    }
+    int W                    = width / 8;
+    int H                    = height / 8;
+    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.0609f);
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.1159f);
+    } else {
+        ggml_set_f32(init_latent, 0.f);
+    }
+
    if (sd_version_is_inpaint(sd_ctx->sd->version)) {
        LOG_WARN("This is an inpainting model, this should only be used in img2img mode with a mask");
    }

-    ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
-
    sd_image_t* result_images = generate_image(sd_ctx,
                                               work_ctx,
                                               init_latent,
@ -2054,6 +2035,23 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
    }
    sd_ctx->sd->rng->manual_seed(seed);

+    int C = 4;
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        C = 16;
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        C = 16;
+    }
+    int W                    = width / 8;
+    int H                    = height / 8;
+    ggml_tensor* init_latent = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
+    if (sd_version_is_sd3(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.0609f);
+    } else if (sd_version_is_flux(sd_ctx->sd->version)) {
+        ggml_set_f32(init_latent, 0.1159f);
+    } else {
+        ggml_set_f32(init_latent, 0.f);
+    }
+
    size_t t0 = ggml_time_ms();

    std::vector<struct ggml_tensor*> ref_latents;
@ -2076,8 +2074,6 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,

    std::vector<float> sigmas = sd_ctx->sd->denoiser->get_sigmas(sample_steps);

-    ggml_tensor* init_latent = generate_init_latent(sd_ctx, work_ctx, width, height);
-
    sd_image_t* result_images = generate_image(sd_ctx,
                                               work_ctx,
                                               init_latent,