fix: avoid writable mmap for read-only weights

This commit is contained in:
leejet 2026-06-23 00:26:20 +08:00
parent 41f7acbfb0
commit 2666e2a658
6 changed files with 24 additions and 22 deletions

View File

@ -261,8 +261,8 @@ bool parse_options(int argc, const char** argv, const std::vector<ArgOptions>& o
invalid_arg = true; invalid_arg = true;
return; return;
} }
if(option.concat && !option.target->empty()){ if (option.concat && !option.target->empty()) {
if(option.concat > 0 && option.concat <= 0xff){ if (option.concat > 0 && option.concat <= 0xff) {
*option.target += static_cast<char>(option.concat); *option.target += static_cast<char>(option.concat);
} }
*option.target += argv_to_utf8(i, argv); *option.target += argv_to_utf8(i, argv);

View File

@ -480,7 +480,7 @@ bool ModelManager::mmap_params(const std::vector<TensorState*>& states,
return true; return true;
} }
auto mmap_store = model_loader_.mmap_tensors(mmap_candidates, {}, true); auto mmap_store = model_loader_.mmap_tensors(mmap_candidates, {}, writable_mmap_);
if (mmap_store.empty()) { if (mmap_store.empty()) {
return true; return true;
} }

View File

@ -69,6 +69,7 @@ private:
uint64_t current_lora_epoch_ = 0; uint64_t current_lora_epoch_ = 0;
int n_threads_ = 0; int n_threads_ = 0;
bool enable_mmap_ = false; bool enable_mmap_ = false;
bool writable_mmap_ = false;
void finish_compute_backend_usage(const std::vector<TensorState*>& states); void finish_compute_backend_usage(const std::vector<TensorState*>& states);
void release_all(); void release_all();
@ -110,6 +111,7 @@ public:
model_loader_.set_n_threads(n_threads); model_loader_.set_n_threads(n_threads);
} }
void set_enable_mmap(bool enable_mmap) { enable_mmap_ = enable_mmap; } void set_enable_mmap(bool enable_mmap) { enable_mmap_ = enable_mmap; }
void set_writable_mmap(bool writable_mmap) { writable_mmap_ = writable_mmap; }
void set_common_ignore_tensors(std::set<std::string> ignore_tensors); void set_common_ignore_tensors(std::set<std::string> ignore_tensors);
void set_loras(std::vector<LoraSpec> loras, SDVersion version); void set_loras(std::vector<LoraSpec> loras, SDVersion version);

View File

@ -3,9 +3,9 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <optional>
#include <string> #include <string>
#include <utility> #include <utility>
#include <optional>
#include "core/util.h" #include "core/util.h"

View File

@ -3,8 +3,8 @@
#include <cstddef> #include <cstddef>
#include <functional> #include <functional>
#include <vector>
#include <optional> #include <optional>
#include <vector>
#include "core/tensor.hpp" #include "core/tensor.hpp"

View File

@ -532,7 +532,6 @@ public:
if (wtype != GGML_TYPE_COUNT || tensor_type_rules.size() > 0) { if (wtype != GGML_TYPE_COUNT || tensor_type_rules.size() > 0) {
model_loader.set_wtype_override(wtype, tensor_type_rules); model_loader.set_wtype_override(wtype, tensor_type_rules);
} }
model_loader.process_model_files(enable_mmap, true);
std::map<ggml_type, uint32_t> wtype_stat = model_loader.get_wtype_stat(); std::map<ggml_type, uint32_t> wtype_stat = model_loader.get_wtype_stat();
std::map<ggml_type, uint32_t> conditioner_wtype_stat = model_loader.get_conditioner_wtype_stat(); std::map<ggml_type, uint32_t> conditioner_wtype_stat = model_loader.get_conditioner_wtype_stat();
@ -586,9 +585,12 @@ public:
apply_lora_immediately = false; apply_lora_immediately = false;
} }
bool needs_writable_mmap = enable_mmap && apply_lora_immediately;
model_manager->set_writable_mmap(needs_writable_mmap);
if (enable_mmap && apply_lora_immediately) { if (enable_mmap && apply_lora_immediately) {
LOG_WARN("in mode 'immediately', LoRAs will cause extra memory usage with mmap"); LOG_WARN("in mode 'immediately', LoRAs will cause extra memory usage with mmap");
} }
model_loader.process_model_files(enable_mmap, needs_writable_mmap);
load_alphas_cumprod(model_loader); load_alphas_cumprod(model_loader);
size_t text_encoder_params_mem_size = 0; size_t text_encoder_params_mem_size = 0;
@ -1943,24 +1945,24 @@ public:
bool slg_uncond = sd::guidance::parse_skip_layer_guidance_uncond_arg(extra_sample_args); bool slg_uncond = sd::guidance::parse_skip_layer_guidance_uncond_arg(extra_sample_args);
std::vector<float> guidance_schedule = sd::guidance::parse_guidance_schedule(extra_sample_args); std::vector<float> guidance_schedule = sd::guidance::parse_guidance_schedule(extra_sample_args);
if(!guidance_schedule.empty() && guidance_schedule.size() != sigmas.size() - 1) { if (!guidance_schedule.empty() && guidance_schedule.size() != sigmas.size() - 1) {
if(guidance_schedule.size() > sigmas.size()) { if (guidance_schedule.size() > sigmas.size()) {
LOG_WARN("guidance_schedule length (%zu) is greater than number of steps (%zu)", guidance_schedule.size(), sigmas.size() - 1); LOG_WARN("guidance_schedule length (%zu) is greater than number of steps (%zu)", guidance_schedule.size(), sigmas.size() - 1);
LOG_WARN("truncating guidance_schedule to match step count"); LOG_WARN("truncating guidance_schedule to match step count");
guidance_schedule.resize(sigmas.size() - 1); guidance_schedule.resize(sigmas.size() - 1);
} else { } else {
LOG_INFO("padding guidance_schedule with cfg_scale"); LOG_INFO("padding guidance_schedule with cfg_scale");
while(guidance_schedule.size() < sigmas.size() - 1) { while (guidance_schedule.size() < sigmas.size() - 1) {
guidance_schedule.push_back(cfg_scale); guidance_schedule.push_back(cfg_scale);
} }
} }
} }
if(!guidance_schedule.empty()) { if (!guidance_schedule.empty()) {
std::string schedule_str = "["; std::string schedule_str = "[";
for(size_t i = 0; i < guidance_schedule.size(); ++i) { for (size_t i = 0; i < guidance_schedule.size(); ++i) {
schedule_str += std::to_string(guidance_schedule[i]); schedule_str += std::to_string(guidance_schedule[i]);
if(i < guidance_schedule.size() - 1) { if (i < guidance_schedule.size() - 1) {
schedule_str += ", "; schedule_str += ", ";
} }
} }
@ -2208,9 +2210,7 @@ public:
guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out; guidance_input.pred_uncond = uncond_out.empty() ? nullptr : &uncond_out;
guidance_input.pred_img_uncond = img_uncond_out.empty() ? nullptr : &img_uncond_out; guidance_input.pred_img_uncond = img_uncond_out.empty() ? nullptr : &img_uncond_out;
sd::guidance::GuiderOutput guided = guidance_schedule.empty()? sd::guidance::GuiderOutput guided = guidance_schedule.empty() ? primary_guidance.forward(guidance_input, {}) : primary_guidance.forward(guidance_input, {}, guidance_schedule[guidance_schedule.size() - 1 - step]);
primary_guidance.forward(guidance_input, {}):
primary_guidance.forward(guidance_input, {}, guidance_schedule[guidance_schedule.size() - 1 - step]);
if (guided.pred.empty()) { if (guided.pred.empty()) {
return {}; return {};
} }