feat: increase work_ctx memory buffer size (#814)

2026-06-23 22:56:42 +00:00 · 2025-09-14 13:19:20 +08:00 · 2025-09-14 13:19:20 +08:00 · dc46993b55
commit dc46993b55
parent a6a8569ea0
5 changed files with 7 additions and 21 deletions
--- a/conditioner.hpp
+++ b/conditioner.hpp
@ -115,7 +115,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
            return true;
        }
        struct ggml_init_params params;
-        params.mem_size               = 10 * 1024 * 1024;  // max for custom embeddings 10 MB
+        params.mem_size               = 100 * 1024 * 1024;  // max for custom embeddings 100 MB
        params.mem_buffer             = NULL;
        params.no_alloc               = false;
        struct ggml_context* embd_ctx = ggml_init(params);
--- a/preprocessing.hpp
+++ b/preprocessing.hpp
@ -164,7 +164,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo

 uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
    struct ggml_init_params params;
-    params.mem_size               = static_cast<size_t>(10 * 1024 * 1024);  // 10
+    params.mem_size               = static_cast<size_t>(10 * 1024 * 1024);  // 10MB
    params.mem_buffer             = NULL;
    params.no_alloc               = false;
    struct ggml_context* work_ctx = ggml_init(params);
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -2196,19 +2196,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
    }

    struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(10 * 1024 * 1024);  // 10 MB
-    if (sd_version_is_sd3(sd_ctx->sd->version)) {
-        params.mem_size *= 3;
-    }
-    if (sd_version_is_flux(sd_ctx->sd->version)) {
-        params.mem_size *= 4;
-    }
-    if (sd_ctx->sd->stacked_id) {
-        params.mem_size += static_cast<size_t>(10 * 1024 * 1024);  // 10 MB
-    }
-    params.mem_size += width * height * 3 * sizeof(float) * 3;
-    params.mem_size += width * height * 3 * sizeof(float) * 3 * sd_img_gen_params->ref_images_count;
-    params.mem_size *= sd_img_gen_params->batch_count;
+    params.mem_size = static_cast<size_t>(1024 * 1024) * 1024;  // 1G
    params.mem_buffer = NULL;
    params.no_alloc   = false;
    // LOG_DEBUG("mem_size %u ", params.mem_size);
@ -2448,8 +2436,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
    }

    struct ggml_init_params params;
-    params.mem_size = static_cast<size_t>(200 * 1024) * 1024;  // 200 MB
-    params.mem_size += width * height * frames * 3 * sizeof(float) * 2;
+    params.mem_size = static_cast<size_t>(1024 * 1024) * 1024;  // 1GB
    params.mem_buffer = NULL;
    params.no_alloc   = false;
    // LOG_DEBUG("mem_size %u ", params.mem_size);
--- a/upscaler.cpp
+++ b/upscaler.cpp
@ -69,8 +69,7 @@ struct UpscalerGGML {
                 input_image.width, input_image.height, output_width, output_height);

        struct ggml_init_params params;
-        params.mem_size = output_width * output_height * 3 * sizeof(float) * 2;
-        params.mem_size += 2 * ggml_tensor_overhead();
+        params.mem_size = static_cast<size_t>(1024 * 1024) * 1024;  // 1G
        params.mem_buffer = NULL;
        params.no_alloc   = false;

@ -80,7 +79,7 @@ struct UpscalerGGML {
            LOG_ERROR("ggml_init() failed");
            return upscaled_image;
        }
-        LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
+        // LOG_DEBUG("upscale work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
        ggml_tensor* input_image_tensor = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, input_image.width, input_image.height, 3, 1);
        sd_image_to_tensor(input_image.data, input_image_tensor);

--- a/wan.hpp
+++ b/wan.hpp
@ -1219,7 +1219,7 @@ namespace WAN {

        void test() {
            struct ggml_init_params params;
-            params.mem_size   = static_cast<size_t>(1000 * 1024 * 1024);  // 10 MB
+            params.mem_size = static_cast<size_t>(1024 * 1024) * 1024;  // 1G
            params.mem_buffer = NULL;
            params.no_alloc   = false;