fix: avoid writable mmap for read-only weights (#1698)

2026-06-23 14:46:39 +00:00 · 2026-06-23 00:39:31 +08:00 · 2026-06-23 00:39:31 +08:00 · f440ad9c29
commit f440ad9c29
parent 41f7acbfb0
6 changed files with 24 additions and 22 deletions
--- a/examples/common/common.cpp
+++ b/examples/common/common.cpp
@ -261,8 +261,8 @@ bool parse_options(int argc, const char** argv, const std::vector<ArgOptions>& o
                        invalid_arg = true;
                        return;
                    }
-                    if(option.concat && !option.target->empty()){
-                        if(option.concat > 0 && option.concat <= 0xff){
+                    if (option.concat && !option.target->empty()) {
+                        if (option.concat > 0 && option.concat <= 0xff) {
                            *option.target += static_cast<char>(option.concat);
                        }
                        *option.target += argv_to_utf8(i, argv);
--- a/src/model_manager.cpp
+++ b/src/model_manager.cpp
@ -480,7 +480,7 @@ bool ModelManager::mmap_params(const std::vector<TensorState*>& states,
        return true;
    }

-    auto mmap_store = model_loader_.mmap_tensors(mmap_candidates, {}, true);
+    auto mmap_store = model_loader_.mmap_tensors(mmap_candidates, {}, writable_mmap_);
    if (mmap_store.empty()) {
        return true;
    }
--- a/src/model_manager.h
+++ b/src/model_manager.h
@ -69,6 +69,7 @@ private:
    uint64_t current_lora_epoch_ = 0;
    int n_threads_               = 0;
    bool enable_mmap_            = false;
+    bool writable_mmap_          = false;

    void finish_compute_backend_usage(const std::vector<TensorState*>& states);
    void release_all();
@ -110,6 +111,7 @@ public:
        model_loader_.set_n_threads(n_threads);
    }
    void set_enable_mmap(bool enable_mmap) { enable_mmap_ = enable_mmap; }
+    void set_writable_mmap(bool writable_mmap) { writable_mmap_ = writable_mmap; }
    void set_common_ignore_tensors(std::set<std::string> ignore_tensors);
    void set_loras(std::vector<LoraSpec> loras, SDVersion version);

--- a/src/runtime/guidance.cpp
+++ b/src/runtime/guidance.cpp
@ -3,9 +3,9 @@
 #include <algorithm>
 #include <cmath>
 #include <cstdlib>
+#include <optional>
 #include <string>
 #include <utility>
-#include <optional>

 #include "core/util.h"

--- a/src/runtime/guidance.h
+++ b/src/runtime/guidance.h
@ -3,8 +3,8 @@

 #include <cstddef>
 #include <functional>
-#include <vector>
 #include <optional>
+#include <vector>

 #include "core/tensor.hpp"

--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@ -532,7 +532,6 @@ public:
        if (wtype != GGML_TYPE_COUNT || tensor_type_rules.size() > 0) {
            model_loader.set_wtype_override(wtype, tensor_type_rules);
        }
-        model_loader.process_model_files(enable_mmap, true);

        std::map<ggml_type, uint32_t> wtype_stat                 = model_loader.get_wtype_stat();
        std::map<ggml_type, uint32_t> conditioner_wtype_stat     = model_loader.get_conditioner_wtype_stat();
@ -586,9 +585,12 @@ public:
            apply_lora_immediately = false;
        }

+        bool needs_writable_mmap = enable_mmap && apply_lora_immediately;
+        model_manager->set_writable_mmap(needs_writable_mmap);
        if (enable_mmap && apply_lora_immediately) {
            LOG_WARN("in mode 'immediately', LoRAs will cause extra memory usage with mmap");
        }
+        model_loader.process_model_files(enable_mmap, needs_writable_mmap);
        load_alphas_cumprod(model_loader);

        size_t text_encoder_params_mem_size = 0;
@ -1943,24 +1945,24 @@ public:
        bool slg_uncond     = sd::guidance::parse_skip_layer_guidance_uncond_arg(extra_sample_args);

        std::vector<float> guidance_schedule = sd::guidance::parse_guidance_schedule(extra_sample_args);
-        if(!guidance_schedule.empty() && guidance_schedule.size() != sigmas.size() - 1) {
-            if(guidance_schedule.size() > sigmas.size()) {
+        if (!guidance_schedule.empty() && guidance_schedule.size() != sigmas.size() - 1) {
+            if (guidance_schedule.size() > sigmas.size()) {
                LOG_WARN("guidance_schedule length (%zu) is greater than number of steps (%zu)", guidance_schedule.size(), sigmas.size() - 1);
                LOG_WARN("truncating guidance_schedule to match step count");
                guidance_schedule.resize(sigmas.size() - 1);
            } else {
                LOG_INFO("padding guidance_schedule with cfg_scale");
-                while(guidance_schedule.size() < sigmas.size() - 1) {
+                while (guidance_schedule.size() < sigmas.size() - 1) {
                    guidance_schedule.push_back(cfg_scale);
                }
            }
        }

-        if(!guidance_schedule.empty()) {
+        if (!guidance_schedule.empty()) {
            std::string schedule_str = "[";
-            for(size_t i = 0; i < guidance_schedule.size(); ++i) {
+            for (size_t i = 0; i < guidance_schedule.size(); ++i) {
                schedule_str += std::to_string(guidance_schedule[i]);
-                if(i < guidance_schedule.size() - 1) {
+                if (i < guidance_schedule.size() - 1) {
                    schedule_str += ", ";
                }
            }
@ -2208,9 +2210,7 @@ public:
            guidance_input.pred_uncond     = uncond_out.empty() ? nullptr : &uncond_out;
            guidance_input.pred_img_uncond = img_uncond_out.empty() ? nullptr : &img_uncond_out;

-            sd::guidance::GuiderOutput guided =  guidance_schedule.empty()? 
-                                            primary_guidance.forward(guidance_input, {}):
-                                            primary_guidance.forward(guidance_input, {}, guidance_schedule[guidance_schedule.size() - 1 - step]);
+            sd::guidance::GuiderOutput guided = guidance_schedule.empty() ? primary_guidance.forward(guidance_input, {}) : primary_guidance.forward(guidance_input, {}, guidance_schedule[guidance_schedule.size() - 1 - step]);
            if (guided.pred.empty()) {
                return {};
            }