refactor: route all runner params through model manager (#1649)

This commit is contained in:
leejet 2026-06-14 02:05:23 +08:00 committed by GitHub
parent 9b0fceb41b
commit 8d4c7af95b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 596 additions and 677 deletions

View File

@ -1,4 +1,4 @@
#ifndef __SD_CONDITIONING_CONDITIONER_HPP__ #ifndef __SD_CONDITIONING_CONDITIONER_HPP__
#define __SD_CONDITIONING_CONDITIONER_HPP__ #define __SD_CONDITIONING_CONDITIONER_HPP__
#include <cmath> #include <cmath>
@ -118,7 +118,6 @@ public:
virtual void set_stream_layers_enabled(bool enabled) {} virtual void set_stream_layers_enabled(bool enabled) {}
virtual void set_flash_attention_enabled(bool enabled) = 0; virtual void set_flash_attention_enabled(bool enabled) = 0;
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {} virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {}
virtual void runner_done() {} virtual void runner_done() {}
}; };
@ -137,10 +136,10 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
std::map<std::string, std::pair<int, int>> embedding_pos_map; std::map<std::string, std::pair<int, int>> embedding_pos_map;
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend, FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::map<std::string, std::string>& orig_embedding_map, const std::map<std::string, std::string>& orig_embedding_map,
SDVersion version = VERSION_SD1) SDVersion version = VERSION_SD1,
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) { : version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) {
for (const auto& kv : orig_embedding_map) { for (const auto& kv : orig_embedding_map) {
std::string name = kv.first; std::string name = kv.first;
@ -150,12 +149,12 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
} }
bool force_clip_f32 = !embedding_map.empty(); bool force_clip_f32 = !embedding_map.empty();
if (sd_version_is_sd1(version)) { if (sd_version_is_sd1(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32); text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32, weight_manager);
} else if (sd_version_is_sd2(version)) { } else if (sd_version_is_sd2(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32); text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32, weight_manager);
} else if (sd_version_is_sdxl(version)) { } else if (sd_version_is_sdxl(version)) {
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32); text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32, weight_manager);
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32); text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32, weight_manager);
} }
} }
@ -194,13 +193,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
} }
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
text_model->set_weight_manager(manager);
if (sd_version_is_sdxl(version)) {
text_model2->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
text_model->runner_done(); text_model->runner_done();
if (sd_version_is_sdxl(version)) { if (sd_version_is_sdxl(version)) {
@ -522,9 +514,9 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
std::string weight_prefix = "cond_stage_model.transformer"; std::string weight_prefix = "cond_stage_model.transformer";
FrozenCLIPVisionEmbedder(ggml_backend_t backend, FrozenCLIPVisionEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, params_backend) { : GGMLRunner(backend, weight_manager) {
bool proj_in = false; bool proj_in = false;
for (const auto& [name, tensor_storage] : tensor_storage_map) { for (const auto& [name, tensor_storage] : tensor_storage_map) {
if (!starts_with(name, weight_prefix)) { if (!starts_with(name, weight_prefix)) {
@ -580,8 +572,8 @@ struct SD3CLIPEmbedder : public Conditioner {
std::shared_ptr<T5Runner> t5; std::shared_ptr<T5Runner> t5;
SD3CLIPEmbedder(ggml_backend_t backend, SD3CLIPEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: clip_g_tokenizer(0) { : clip_g_tokenizer(0) {
bool use_clip_l = false; bool use_clip_l = false;
bool use_clip_g = false; bool use_clip_g = false;
@ -600,13 +592,13 @@ struct SD3CLIPEmbedder : public Conditioner {
return; return;
} }
if (use_clip_l) { if (use_clip_l) {
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false); clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, false, weight_manager);
} }
if (use_clip_g) { if (use_clip_g) {
clip_g = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false); clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, false, weight_manager);
} }
if (use_t5) { if (use_t5) {
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer"); t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
} }
} }
@ -670,18 +662,6 @@ struct SD3CLIPEmbedder : public Conditioner {
} }
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
if (clip_l) {
clip_l->set_weight_manager(manager);
}
if (clip_g) {
clip_g->set_weight_manager(manager);
}
if (t5) {
t5->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
if (clip_l) { if (clip_l) {
clip_l->runner_done(); clip_l->runner_done();
@ -961,8 +941,8 @@ struct FluxCLIPEmbedder : public Conditioner {
size_t chunk_len = 256; size_t chunk_len = 256;
FluxCLIPEmbedder(ggml_backend_t backend, FluxCLIPEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
bool use_clip_l = false; bool use_clip_l = false;
bool use_t5 = false; bool use_t5 = false;
for (auto pair : tensor_storage_map) { for (auto pair : tensor_storage_map) {
@ -979,12 +959,12 @@ struct FluxCLIPEmbedder : public Conditioner {
} }
if (use_clip_l) { if (use_clip_l) {
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true); clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, false, weight_manager);
} else { } else {
LOG_WARN("clip_l text encoder not found! Prompt adherence might be degraded."); LOG_WARN("clip_l text encoder not found! Prompt adherence might be degraded.");
} }
if (use_t5) { if (use_t5) {
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer"); t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
} else { } else {
LOG_WARN("t5xxl text encoder not found! Prompt adherence might be degraded."); LOG_WARN("t5xxl text encoder not found! Prompt adherence might be degraded.");
} }
@ -1035,15 +1015,6 @@ struct FluxCLIPEmbedder : public Conditioner {
} }
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
if (clip_l) {
clip_l->set_weight_manager(manager);
}
if (t5) {
t5->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
if (clip_l) { if (clip_l) {
clip_l->runner_done(); clip_l->runner_done();
@ -1219,11 +1190,11 @@ struct T5CLIPEmbedder : public Conditioner {
bool is_umt5 = false; bool is_umt5 = false;
T5CLIPEmbedder(ggml_backend_t backend, T5CLIPEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
bool use_mask = false, bool use_mask = false,
int mask_pad = 0, int mask_pad = 0,
bool is_umt5 = false) bool is_umt5 = false,
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) { : use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
bool use_t5 = false; bool use_t5 = false;
for (auto pair : tensor_storage_map) { for (auto pair : tensor_storage_map) {
@ -1236,7 +1207,7 @@ struct T5CLIPEmbedder : public Conditioner {
LOG_WARN("IMPORTANT NOTICE: No text encoders provided, cannot process prompts!"); LOG_WARN("IMPORTANT NOTICE: No text encoders provided, cannot process prompts!");
return; return;
} else { } else {
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5); t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5, weight_manager);
} }
} }
@ -1270,12 +1241,6 @@ struct T5CLIPEmbedder : public Conditioner {
} }
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
if (t5) {
t5->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
if (t5) { if (t5) {
t5->runner_done(); t5->runner_done();
@ -1422,15 +1387,15 @@ struct AnimaConditioner : public Conditioner {
std::shared_ptr<LLM::LLMRunner> llm; std::shared_ptr<LLM::LLMRunner> llm;
AnimaConditioner(ggml_backend_t backend, AnimaConditioner(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
qwen_tokenizer = std::make_shared<Qwen2Tokenizer>(); qwen_tokenizer = std::make_shared<Qwen2Tokenizer>();
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::QWEN3, llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::QWEN3,
backend, backend,
params_backend,
tensor_storage_map, tensor_storage_map,
"text_encoders.llm", "text_encoders.llm",
false); false,
weight_manager);
} }
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override { void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
@ -1453,10 +1418,6 @@ struct AnimaConditioner : public Conditioner {
llm->set_weight_adapter(adapter); llm->set_weight_adapter(adapter);
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
llm->set_weight_manager(manager);
}
void runner_done() override { void runner_done() override {
llm->runner_done(); llm->runner_done();
} }
@ -1545,11 +1506,11 @@ struct LLMEmbedder : public Conditioner {
std::shared_ptr<LLM::LLMRunner> llm; std::shared_ptr<LLM::LLMRunner> llm;
LLMEmbedder(ggml_backend_t backend, LLMEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
SDVersion version = VERSION_QWEN_IMAGE, SDVersion version = VERSION_QWEN_IMAGE,
const std::string prefix = "", const std::string prefix = "",
bool enable_vision = false) bool enable_vision = false,
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: version(version) { : version(version) {
LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL; LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL;
if (version == VERSION_FLUX2) { if (version == VERSION_FLUX2) {
@ -1576,10 +1537,10 @@ struct LLMEmbedder : public Conditioner {
} }
llm = std::make_shared<LLM::LLMRunner>(arch, llm = std::make_shared<LLM::LLMRunner>(arch,
backend, backend,
params_backend,
tensor_storage_map, tensor_storage_map,
"text_encoders.llm", "text_encoders.llm",
enable_vision); enable_vision,
weight_manager);
} }
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override { void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
@ -1604,12 +1565,6 @@ struct LLMEmbedder : public Conditioner {
} }
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
if (llm) {
llm->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
if (llm) { if (llm) {
llm->runner_done(); llm->runner_done();
@ -2106,10 +2061,10 @@ struct LTXAVTextProjectionRunner : public GGMLRunner {
LTXAVTextProjection model; LTXAVTextProjection model;
LTXAVTextProjectionRunner(ggml_backend_t backend, LTXAVTextProjectionRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string& prefix = "") const std::string& prefix = "",
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
model(tensor_storage_map.find(prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end()) { model(tensor_storage_map.find(prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end()) {
model.init(params_ctx, tensor_storage_map, prefix); model.init(params_ctx, tensor_storage_map, prefix);
} }
@ -2154,22 +2109,22 @@ struct LTXAVEmbedder : public Conditioner {
bool dual_projection = false; bool dual_projection = false;
LTXAVEmbedder(ggml_backend_t backend, LTXAVEmbedder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string& llm_prefix = "text_encoders.llm", const std::string& llm_prefix = "text_encoders.llm",
const std::string& projector_prefix = "text_embedding_projection") { const std::string& projector_prefix = "text_embedding_projection",
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
tokenizer = std::make_shared<GemmaTokenizer>(); tokenizer = std::make_shared<GemmaTokenizer>();
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::GEMMA3_12B, llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::GEMMA3_12B,
backend, backend,
params_backend,
tensor_storage_map, tensor_storage_map,
llm_prefix, llm_prefix,
false); false,
weight_manager);
dual_projection = tensor_storage_map.find(projector_prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end(); dual_projection = tensor_storage_map.find(projector_prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end();
projector = std::make_shared<LTXAVTextProjectionRunner>(backend, projector = std::make_shared<LTXAVTextProjectionRunner>(backend,
params_backend,
tensor_storage_map, tensor_storage_map,
projector_prefix); projector_prefix,
weight_manager);
} }
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override { void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
@ -2192,11 +2147,6 @@ struct LTXAVEmbedder : public Conditioner {
projector->set_weight_adapter(adapter); projector->set_weight_adapter(adapter);
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
llm->set_weight_manager(manager);
projector->set_weight_manager(manager);
}
void runner_done() override { void runner_done() override {
llm->runner_done(); llm->runner_done();
projector->runner_done(); projector->runner_done();

View File

@ -1696,11 +1696,9 @@ protected:
using GraphCutSegment = sd::ggml_graph_cut::Segment; using GraphCutSegment = sd::ggml_graph_cut::Segment;
using GraphCutPlan = sd::ggml_graph_cut::Plan; using GraphCutPlan = sd::ggml_graph_cut::Plan;
ggml_backend_t params_backend = nullptr;
ggml_backend_t runtime_backend = nullptr; ggml_backend_t runtime_backend = nullptr;
ggml_context* params_ctx = nullptr; ggml_context* params_ctx = nullptr;
ggml_backend_buffer_t params_buffer = nullptr;
ggml_context* cache_ctx = nullptr; ggml_context* cache_ctx = nullptr;
ggml_backend_buffer_t cache_buffer = nullptr; ggml_backend_buffer_t cache_buffer = nullptr;
@ -1880,9 +1878,6 @@ protected:
auto manager = weight_manager.lock(); auto manager = weight_manager.lock();
if (manager == nullptr) { if (manager == nullptr) {
if (!params_to_prepare.empty()) { if (!params_to_prepare.empty()) {
if (params_buffer != nullptr) {
return true;
}
LOG_ERROR("%s weight manager is not set for graph params", get_desc().c_str()); LOG_ERROR("%s weight manager is not set for graph params", get_desc().c_str());
return false; return false;
} }
@ -2194,13 +2189,11 @@ protected:
plan.valid && plan.valid &&
max_graph_vram_bytes > 0 && max_graph_vram_bytes > 0 &&
plan.segments.size() > 1 && plan.segments.size() > 1 &&
params_backend != runtime_backend &&
!sd_backend_is_cpu(runtime_backend); !sd_backend_is_cpu(runtime_backend);
} }
bool can_attempt_graph_cut_segmented_compute() const { bool can_attempt_graph_cut_segmented_compute() const {
return max_graph_vram_bytes > 0 && return max_graph_vram_bytes > 0 &&
params_backend != runtime_backend &&
!sd_backend_is_cpu(runtime_backend); !sd_backend_is_cpu(runtime_backend);
} }
@ -2631,16 +2624,15 @@ public:
public: public:
virtual std::string get_desc() = 0; virtual std::string get_desc() = 0;
GGMLRunner(ggml_backend_t backend, ggml_backend_t params_backend) GGMLRunner(ggml_backend_t backend,
: params_backend(params_backend), std::shared_ptr<RunnerWeightManager> manager = nullptr)
runtime_backend(backend) { : runtime_backend(backend),
weight_manager(manager) {
GGML_ASSERT(runtime_backend != nullptr); GGML_ASSERT(runtime_backend != nullptr);
GGML_ASSERT(params_backend != nullptr);
alloc_params_ctx(); alloc_params_ctx();
} }
virtual ~GGMLRunner() { virtual ~GGMLRunner() {
free_params_buffer();
free_compute_buffer(); free_compute_buffer();
free_params_ctx(); free_params_ctx();
free_compute_ctx(); free_compute_ctx();
@ -2674,73 +2666,6 @@ public:
alloc_compute_ctx(); alloc_compute_ctx();
} }
bool alloc_params_buffer() {
size_t num_tensors = ggml_tensor_num(params_ctx);
if (num_tensors > 0) {
// ggml_backend_alloc_ctx_tensors fails when all tensors are already allocated
// (typical for memory-mapped weights). See ggml-alloc.c n_buffers==0 branch.
bool all_have_data = true;
for (ggml_tensor* t = ggml_get_first_tensor(params_ctx); t != nullptr; t = ggml_get_next_tensor(params_ctx, t)) {
if (t->data == nullptr) {
all_have_data = false;
break;
}
}
if (all_have_data) {
LOG_DEBUG("%s all params already mmap-allocated (no separate buffer needed)", get_desc().c_str());
params_buffer = nullptr;
rebuild_params_tensor_set();
return true;
}
} else {
LOG_DEBUG("%s skipping params allocation (no tensors)", get_desc().c_str());
return true;
}
// Pinned host buffer when CPU-offloaded for DMA-direct H2D.
ggml_backend_buffer_type_t params_buft = nullptr;
if (params_backend != runtime_backend) {
ggml_backend_dev_t runtime_dev = ggml_backend_get_device(runtime_backend);
if (runtime_dev != nullptr) {
params_buft = ggml_backend_dev_host_buffer_type(runtime_dev);
}
}
if (params_buft == nullptr) {
params_buft = ggml_backend_get_default_buffer_type(params_backend);
}
params_buffer = ggml_backend_alloc_ctx_tensors_from_buft(params_ctx, params_buft);
if (params_buffer == nullptr) {
LOG_ERROR("%s alloc params backend buffer failed, num_tensors = %i",
get_desc().c_str(),
num_tensors);
return false;
}
rebuild_params_tensor_set();
ggml_backend_buffer_set_usage(params_buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
size_t params_buffer_size = ggml_backend_buffer_get_size(params_buffer);
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
get_desc().c_str(),
params_buffer_size / (1024.f * 1024.f),
sd_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
num_tensors);
return true;
}
protected:
void free_params_buffer() {
if (params_buffer != nullptr) {
ggml_backend_buffer_free(params_buffer);
params_buffer = nullptr;
}
observed_max_effective_budget_ = 0;
}
size_t get_params_buffer_size() {
if (params_buffer != nullptr) {
return ggml_backend_buffer_get_size(params_buffer);
}
return 0;
}
public: public:
void free_cache_ctx_and_buffer() { void free_cache_ctx_and_buffer() {
free_cache_buffer(); free_cache_buffer();
@ -2886,15 +2811,6 @@ public:
weight_adapter = adapter; weight_adapter = adapter;
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {
weight_manager = manager;
}
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager,
const std::string&) {
set_weight_manager(manager);
}
void set_max_graph_vram_bytes(size_t max_vram_bytes) { void set_max_graph_vram_bytes(size_t max_vram_bytes) {
max_graph_vram_bytes = max_vram_bytes; max_graph_vram_bytes = max_vram_bytes;
} }
@ -2902,14 +2818,6 @@ public:
void set_stream_layers_enabled(bool enabled) { void set_stream_layers_enabled(bool enabled) {
stream_layers_enabled = enabled; stream_layers_enabled = enabled;
} }
ggml_backend_t get_runtime_backend() {
return runtime_backend;
}
ggml_backend_t get_params_backend() {
return params_backend;
}
}; };
class GGMLBlock { class GGMLBlock {

View File

@ -19,6 +19,7 @@ struct GenerationExtensionInitContext {
SDVersion version; SDVersion version;
const String2TensorStorage& tensor_storage_map; const String2TensorStorage& tensor_storage_map;
ModelLoader& model_loader; ModelLoader& model_loader;
std::shared_ptr<ModelManager> model_manager;
int n_threads; int n_threads;
std::function<bool(SDBackendModule)> ensure_backend_pair; std::function<bool(SDBackendModule)> ensure_backend_pair;
std::function<ggml_backend_t(SDBackendModule)> backend_for; std::function<ggml_backend_t(SDBackendModule)> backend_for;
@ -46,7 +47,6 @@ struct GenerationExtension {
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>&) {} virtual void get_param_tensors(std::map<std::string, ggml_tensor*>&) {}
virtual void collect_loras(std::vector<ModelManager::LoraSpec>&) {} virtual void collect_loras(std::vector<ModelManager::LoraSpec>&) {}
virtual void add_ignore_tensors(std::set<std::string>&) const {} virtual void add_ignore_tensors(std::set<std::string>&) const {}
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>&) {}
virtual void runner_done() {} virtual void runner_done() {}
virtual void reset_runtime_condition() {} virtual void reset_runtime_condition() {}
virtual bool prepare_condition(GenerationExtensionConditionContext&) { virtual bool prepare_condition(GenerationExtensionConditionContext&) {

View File

@ -134,11 +134,12 @@ struct PhotoMakerExtension : public GenerationExtension {
} }
pmid_model = std::make_shared<PhotoMakerIDEncoder>(ctx.backend_for(SDBackendModule::PHOTOMAKER), pmid_model = std::make_shared<PhotoMakerIDEncoder>(ctx.backend_for(SDBackendModule::PHOTOMAKER),
ctx.params_backend_for(SDBackendModule::PHOTOMAKER),
ctx.tensor_storage_map, ctx.tensor_storage_map,
"pmid", "pmid",
ctx.version, ctx.version,
pm_version); pm_version,
20.f,
ctx.model_manager);
if (pm_version == PM_VERSION_2) { if (pm_version == PM_VERSION_2) {
LOG_INFO("using PhotoMaker Version 2"); LOG_INFO("using PhotoMaker Version 2");
} }
@ -174,12 +175,6 @@ struct PhotoMakerExtension : public GenerationExtension {
ignore_tensors.insert("pmid.unet."); ignore_tensors.insert("pmid.unet.");
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
if (pmid_model != nullptr) {
pmid_model->set_weight_manager(manager);
}
}
void runner_done() override { void runner_done() override {
if (pmid_model != nullptr) { if (pmid_model != nullptr) {
pmid_model->runner_done(); pmid_model->runner_done();

View File

@ -4,6 +4,7 @@
#include <mutex> #include <mutex>
#include "core/ggml_extend.hpp" #include "core/ggml_extend.hpp"
#include "model_loader.h" #include "model_loader.h"
#include "model_manager.h"
#define LORA_GRAPH_BASE_SIZE 10240 #define LORA_GRAPH_BASE_SIZE 10240
@ -14,7 +15,8 @@ struct LoraModel : public GGMLRunner {
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor; std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
std::set<std::string> applied_lora_tensors; std::set<std::string> applied_lora_tensors;
std::string file_path; std::string file_path;
ModelLoader model_loader; std::shared_ptr<ModelManager> model_manager;
ggml_backend_t params_backend = nullptr;
bool load_failed = false; bool load_failed = false;
bool applied = false; bool applied = false;
bool tensor_preprocessed = false; bool tensor_preprocessed = false;
@ -23,13 +25,14 @@ struct LoraModel : public GGMLRunner {
LoraModel(const std::string& lora_id, LoraModel(const std::string& lora_id,
ggml_backend_t backend, ggml_backend_t backend,
ggml_backend_t params_backend, ggml_backend_t params_backend_,
const std::string& file_path = "", const std::string& file_path = "",
std::string prefix = "", std::string prefix = "",
SDVersion version = VERSION_COUNT) SDVersion version = VERSION_COUNT,
: lora_id(lora_id), file_path(file_path), GGMLRunner(backend, params_backend) { std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>())
: GGMLRunner(backend, manager), lora_id(lora_id), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
prefix = "lora." + prefix; prefix = "lora." + prefix;
if (!model_loader.init_from_file_and_convert_name(file_path, prefix, version)) { if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix, version)) {
load_failed = true; load_failed = true;
} }
} }
@ -71,7 +74,10 @@ struct LoraModel : public GGMLRunner {
return true; return true;
}; };
model_loader.set_n_threads(n_threads); if (model_manager != nullptr) {
model_manager->set_n_threads(n_threads);
}
ModelLoader& model_loader = model_manager->loader();
model_loader.load_tensors(on_new_tensor_cb); model_loader.load_tensors(on_new_tensor_cb);
if (tensors_to_create.empty()) { if (tensors_to_create.empty()) {
@ -88,23 +94,42 @@ struct LoraModel : public GGMLRunner {
lora_tensors[name] = real; lora_tensors[name] = real;
} }
if (!alloc_params_buffer()) { std::map<std::string, ggml_tensor*> tensors;
LOG_ERROR("lora model buffer allocation failed"); for (const auto& pair : lora_tensors) {
tensors[pair.first] = pair.second;
}
if (model_manager == nullptr ||
!model_manager->register_param_tensors("LoRA",
std::move(tensors),
ModelManager::ResidencyMode::Resident,
runtime_backend,
params_backend) ||
!model_manager->validate_registered_tensors()) {
LOG_ERROR("lora model manager registration failed");
return false;
}
std::vector<ggml_tensor*> lora_params;
lora_params.reserve(lora_tensors.size());
for (const auto& pair : lora_tensors) {
lora_params.push_back(pair.second);
}
if (!model_manager->prepare_params(lora_params)) {
LOG_ERROR("lora model manager prepare params failed");
return false; return false;
} }
dry_run = false;
model_loader.load_tensors(on_new_tensor_cb);
LOG_DEBUG("finished loaded lora"); LOG_DEBUG("finished loaded lora");
return true; return true;
} }
void release_loaded_tensors() { void release_loaded_tensors() {
runner_done();
free_compute_buffer(); free_compute_buffer();
free_params_buffer(); model_manager.reset();
free_params_ctx(); free_params_ctx();
alloc_params_ctx(); alloc_params_ctx();
model_manager = std::make_shared<ModelManager>();
weight_manager = model_manager;
lora_tensors.clear(); lora_tensors.clear();
original_tensor_to_final_tensor.clear(); original_tensor_to_final_tensor.clear();
applied_lora_tensors.clear(); applied_lora_tensors.clear();

View File

@ -413,13 +413,13 @@ public:
public: public:
PhotoMakerIDEncoder(ggml_backend_t backend, PhotoMakerIDEncoder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
SDVersion version = VERSION_SDXL, SDVersion version = VERSION_SDXL,
PMVersion pm_v = PM_VERSION_1, PMVersion pm_v = PM_VERSION_1,
float sty = 20.f) float sty = 20.f,
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
version(version), version(version),
pm_version(pm_v), pm_version(pm_v),
style_strength(sty) { style_strength(sty) {
@ -565,17 +565,18 @@ public:
struct PhotoMakerIDEmbed : public GGMLRunner { struct PhotoMakerIDEmbed : public GGMLRunner {
std::map<std::string, ggml_tensor*> tensors; std::map<std::string, ggml_tensor*> tensors;
std::string file_path; std::string file_path;
ModelLoader* model_loader; std::shared_ptr<ModelManager> model_manager;
ggml_backend_t params_backend = nullptr;
bool load_failed = false; bool load_failed = false;
bool applied = false; bool applied = false;
PhotoMakerIDEmbed(ggml_backend_t backend, PhotoMakerIDEmbed(ggml_backend_t backend,
ggml_backend_t params_backend, ggml_backend_t params_backend_,
ModelLoader* ml, std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>(),
const std::string& file_path = "", const std::string& file_path = "",
const std::string& prefix = "") const std::string& prefix = "")
: file_path(file_path), GGMLRunner(backend, params_backend), model_loader(ml) { : GGMLRunner(backend, manager), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
if (!model_loader->init_from_file_and_convert_name(file_path, prefix)) { if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix)) {
load_failed = true; load_failed = true;
} }
} }
@ -616,15 +617,27 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
return true; return true;
}; };
model_loader->set_n_threads(n_threads); model_manager->set_n_threads(n_threads);
model_loader->load_tensors(on_new_tensor_cb); ModelLoader& model_loader = model_manager->loader();
if (!alloc_params_buffer()) { model_loader.load_tensors(on_new_tensor_cb);
LOG_ERROR("PhotoMaker ID embeds buffer allocation failed"); if (!model_manager->register_param_tensors("PhotoMaker ID embeds",
tensors,
ModelManager::ResidencyMode::Resident,
runtime_backend,
params_backend) ||
!model_manager->validate_registered_tensors()) {
LOG_ERROR("PhotoMaker ID embeds model manager registration failed");
return false;
}
std::vector<ggml_tensor*> id_embed_params;
id_embed_params.reserve(tensors.size());
for (const auto& pair : tensors) {
id_embed_params.push_back(pair.second);
}
if (!model_manager->prepare_params(id_embed_params)) {
LOG_ERROR("PhotoMaker ID embeds model manager prepare params failed");
return false; return false;
} }
dry_run = false;
model_loader->load_tensors(on_new_tensor_cb);
LOG_DEBUG("finished loading PhotoMaker ID Embeds "); LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
return true; return true;

View File

@ -560,11 +560,11 @@ protected:
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1); params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
} }
float get_alpha() { ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
// image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,] // image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,]
// so learned_with_images is same as learned // so learned_with_images is same as learned
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]); auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
return sigmoid(alpha); return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
} }
public: public:
@ -578,11 +578,12 @@ public:
ggml_tensor* x_spatial, ggml_tensor* x_spatial,
ggml_tensor* x_temporal) { ggml_tensor* x_temporal) {
// image_only_indicator is always tensor([0.]) // image_only_indicator is always tensor([0.])
float alpha = get_alpha(); auto alpha = get_alpha(ctx);
auto x = ggml_add(ctx->ggml_ctx, return ggml_add(ctx->ggml_ctx,
ggml_ext_scale(ctx->ggml_ctx, x_spatial, alpha), x_temporal,
ggml_ext_scale(ctx->ggml_ctx, x_temporal, 1.0f - alpha)); ggml_mul(ctx->ggml_ctx,
return x; ggml_sub(ctx->ggml_ctx, x_spatial, x_temporal),
alpha));
} }
}; };

View File

@ -561,10 +561,10 @@ namespace Anima {
AnimaNet net; AnimaNet net;
AnimaRunner(ggml_backend_t backend, AnimaRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "model.diffusion_model") const std::string prefix = "model.diffusion_model",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(AnimaConfig::detect_from_weights(tensor_storage_map, prefix + ".net")) { config(AnimaConfig::detect_from_weights(tensor_storage_map, prefix + ".net")) {
net = AnimaNet(config); net = AnimaNet(config);
net.init(params_ctx, tensor_storage_map, prefix + ".net"); net.init(params_ctx, tensor_storage_map, prefix + ".net");

View File

@ -1,8 +1,9 @@
#ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__ #ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__
#define __SD_MODEL_DIFFUSION_CONTROL_HPP__ #define __SD_MODEL_DIFFUSION_CONTROL_HPP__
#include "model/common/block.hpp" #include "model/common/block.hpp"
#include "model_loader.h" #include "model_loader.h"
#include "model_manager.h"
#define CONTROL_NET_GRAPH_SIZE 1536 #define CONTROL_NET_GRAPH_SIZE 1536
@ -318,13 +319,16 @@ struct ControlNet : public GGMLRunner {
std::vector<sd::Tensor<float>> controls; std::vector<sd::Tensor<float>> controls;
sd::Tensor<float> guided_hint; sd::Tensor<float> guided_hint;
bool guided_hint_cached = false; bool guided_hint_cached = false;
std::shared_ptr<ModelManager> owned_model_manager;
ggml_backend_t params_backend = nullptr;
ControlNet(ggml_backend_t backend, ControlNet(ggml_backend_t backend,
ggml_backend_t params_backend, ggml_backend_t params_backend_,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
SDVersion version = VERSION_SD1, SDVersion version = VERSION_SD1,
const std::string& prefix = "") const std::string& prefix = "",
: GGMLRunner(backend, params_backend), version(version), control_net(version), weight_prefix(prefix) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager), version(version), control_net(version), weight_prefix(prefix), params_backend(params_backend_) {
control_net.init(params_ctx, tensor_storage_map, prefix); control_net.init(params_ctx, tensor_storage_map, prefix);
} }
@ -459,31 +463,35 @@ struct ControlNet : public GGMLRunner {
bool load_from_file(const std::string& file_path, int n_threads) { bool load_from_file(const std::string& file_path, int n_threads) {
LOG_INFO("loading control net from '%s'", file_path.c_str()); LOG_INFO("loading control net from '%s'", file_path.c_str());
if (!alloc_params_buffer()) {
LOG_ERROR("control net model buffer allocation failed");
return false;
}
std::map<std::string, ggml_tensor*> tensors; std::map<std::string, ggml_tensor*> tensors;
control_net.get_param_tensors(tensors); control_net.get_param_tensors(tensors);
std::set<std::string> ignore_tensors;
ModelLoader model_loader; auto manager = std::dynamic_pointer_cast<ModelManager>(weight_manager.lock());
if (manager == nullptr) {
owned_model_manager = std::make_shared<ModelManager>();
weight_manager = owned_model_manager;
manager = owned_model_manager;
}
ModelLoader& model_loader = manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path)) { if (!model_loader.init_from_file_and_convert_name(file_path)) {
LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str());
return false; return false;
} }
model_loader.set_n_threads(n_threads); manager->set_n_threads(n_threads);
bool success = model_loader.load_tensors(tensors, ignore_tensors); if (!manager->register_param_tensors("ControlNet",
std::move(tensors),
if (!success) { ModelManager::ResidencyMode::Resident,
LOG_ERROR("load control net tensors from model loader failed"); runtime_backend,
params_backend) ||
!manager->validate_registered_tensors()) {
LOG_ERROR("register control net tensors with model manager failed");
return false; return false;
} }
LOG_INFO("control net model loaded"); LOG_INFO("control net model loaded");
return success; return true;
} }
}; };

View File

@ -387,10 +387,10 @@ namespace ErnieImage {
std::vector<float> pe_vec; std::vector<float> pe_vec;
ErnieImageRunner(ggml_backend_t backend, ErnieImageRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "") const std::string prefix = "",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(ErnieImageConfig::detect_from_weights(tensor_storage_map, prefix)) { config(ErnieImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
ernie_image = ErnieImageModel(config); ernie_image = ErnieImageModel(config);
ernie_image.init(params_ctx, tensor_storage_map, prefix); ernie_image.init(params_ctx, tensor_storage_map, prefix);

View File

@ -1301,12 +1301,12 @@ namespace Flux {
bool use_mask = false; bool use_mask = false;
FluxRunner(ggml_backend_t backend, FluxRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
SDVersion version = VERSION_FLUX, SDVersion version = VERSION_FLUX,
bool use_mask = false) bool use_mask = false,
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(FluxConfig::detect_from_weights(tensor_storage_map, prefix, version)), config(FluxConfig::detect_from_weights(tensor_storage_map, prefix, version)),
version(version), version(version),
use_mask(use_mask) { use_mask(use_mask) {
@ -1583,7 +1583,8 @@ namespace Flux {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_COUNT; ggml_type model_data_type = GGML_TYPE_COUNT;
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -1599,24 +1600,20 @@ namespace Flux {
} }
std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend, std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
VERSION_FLUX2, VERSION_FLUX2,
false); false,
model_manager);
if (!flux->alloc_params_buffer()) { if (!model_manager->register_runner_params("Flux test",
LOG_ERROR("flux model allocation failed"); *flux,
return; "model.diffusion_model",
} ModelManager::ResidencyMode::Resident,
backend,
std::map<std::string, ggml_tensor*> tensors; backend) ||
flux->get_param_tensors(tensors, "model.diffusion_model"); !model_manager->validate_registered_tensors()) {
LOG_ERROR("register flux tensors with model manager failed");
bool success = model_loader.load_tensors(tensors);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__ #ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
#define __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__ #define __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
#include <algorithm> #include <algorithm>
@ -282,10 +282,10 @@ namespace HiDreamO1 {
std::array<std::vector<float>, 4> pos_embed_weight_data_; std::array<std::vector<float>, 4> pos_embed_weight_data_;
HiDreamO1VisionRunner(ggml_backend_t backend, HiDreamO1VisionRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string& prefix = "model.visual") const std::string& prefix = "model.visual",
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)), config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)),
model(std::make_shared<LLM::VisionModel>(false, config.llm.vision)) { model(std::make_shared<LLM::VisionModel>(false, config.llm.vision)) {
model->init(params_ctx, tensor_storage_map, prefix); model->init(params_ctx, tensor_storage_map, prefix);
@ -343,10 +343,10 @@ namespace HiDreamO1 {
std::vector<float> attention_mask_vec; std::vector<float> attention_mask_vec;
HiDreamO1Runner(ggml_backend_t backend, HiDreamO1Runner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string& prefix = "model") const std::string& prefix = "model",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)) { config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)) {
model = HiDreamO1Model(config); model = HiDreamO1Model(config);
model.init(params_ctx, tensor_storage_map, prefix); model.init(params_ctx, tensor_storage_map, prefix);
@ -490,9 +490,9 @@ namespace HiDreamO1 {
std::shared_ptr<HiDreamO1VisionRunner> vision_runner; std::shared_ptr<HiDreamO1VisionRunner> vision_runner;
HiDreamO1Conditioner(ggml_backend_t backend, HiDreamO1Conditioner(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, params_backend, tensor_storage_map)) {} : vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, tensor_storage_map, "model.visual", weight_manager)) {}
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override { void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
vision_runner->get_param_tensors(tensors); vision_runner->get_param_tensors(tensors);
@ -510,10 +510,6 @@ namespace HiDreamO1 {
vision_runner->set_weight_adapter(adapter); vision_runner->set_weight_adapter(adapter);
} }
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
vision_runner->set_weight_manager(manager);
}
void runner_done() override { void runner_done() override {
vision_runner->runner_done(); vision_runner->runner_done();
} }

View File

@ -449,10 +449,10 @@ namespace Ideogram4 {
std::vector<int32_t> image_indicator_vec; std::vector<int32_t> image_indicator_vec;
Ideogram4Runner(ggml_backend_t backend, Ideogram4Runner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "") const std::string prefix = "",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(Ideogram4Config::detect_from_weights(tensor_storage_map, prefix)), config(Ideogram4Config::detect_from_weights(tensor_storage_map, prefix)),
uncond_prefix(prefix + ".uncond") { uncond_prefix(prefix + ".uncond") {
model = Ideogram4Transformer(config); model = Ideogram4Transformer(config);

View File

@ -356,10 +356,10 @@ namespace Lens {
std::vector<float> pe_vec; std::vector<float> pe_vec;
LensRunner(ggml_backend_t backend, LensRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "") const std::string prefix = "",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(LensConfig::detect_from_weights(tensor_storage_map, prefix)) { config(LensConfig::detect_from_weights(tensor_storage_map, prefix)) {
lens = LensModel(config); lens = LensModel(config);
lens.init(params_ctx, tensor_storage_map, prefix); lens.init(params_ctx, tensor_storage_map, prefix);

View File

@ -1686,10 +1686,10 @@ namespace LTXV {
sd::Tensor<float> ax_input_cache; sd::Tensor<float> ax_input_cache;
LTXAVRunner(ggml_backend_t backend, LTXAVRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string& prefix = "model.diffusion_model") const std::string& prefix = "model.diffusion_model",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(LTXAVConfig::detect_from_weights(tensor_storage_map, prefix)), config(LTXAVConfig::detect_from_weights(tensor_storage_map, prefix)),
model(config) { model(config) {
model.init(params_ctx, tensor_storage_map, prefix); model.init(params_ctx, tensor_storage_map, prefix);
@ -2025,7 +2025,8 @@ namespace LTXV {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
LOG_INFO("loading ltxav from '%s'", model_path.c_str()); LOG_INFO("loading ltxav from '%s'", model_path.c_str());
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(model_path, "model.diffusion_model.")) { if (!model_loader.init_from_file_and_convert_name(model_path, "model.diffusion_model.")) {
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
return; return;
@ -2040,19 +2041,18 @@ namespace LTXV {
auto& tensor_storage_map = model_loader.get_tensor_storage_map(); auto& tensor_storage_map = model_loader.get_tensor_storage_map();
std::shared_ptr<LTXAVRunner> ltxav = std::make_shared<LTXAVRunner>(backend, std::shared_ptr<LTXAVRunner> ltxav = std::make_shared<LTXAVRunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); "model.diffusion_model",
model_manager);
if (!ltxav->alloc_params_buffer()) { if (!model_manager->register_runner_params("LTXAV test",
LOG_ERROR("ltxav buffer allocation failed"); *ltxav,
return; "model.diffusion_model",
} ModelManager::ResidencyMode::Resident,
std::map<std::string, ggml_tensor*> tensors; backend,
ltxav->get_param_tensors(tensors, "model.diffusion_model"); backend) ||
!model_manager->validate_registered_tensors()) {
if (!model_loader.load_tensors(tensors)) { LOG_ERROR("register ltxav tensors with model manager failed");
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -879,10 +879,10 @@ struct MMDiTRunner : public DiffusionModelRunner {
MMDiT mmdit; MMDiT mmdit;
MMDiTRunner(ggml_backend_t backend, MMDiTRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "") const std::string prefix = "",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(MMDiTConfig::detect_from_weights(tensor_storage_map, prefix)), config(MMDiTConfig::detect_from_weights(tensor_storage_map, prefix)),
mmdit(config) { mmdit(config) {
mmdit.init(params_ctx, tensor_storage_map, prefix); mmdit.init(params_ctx, tensor_storage_map, prefix);
@ -1001,28 +1001,25 @@ struct MMDiTRunner : public DiffusionModelRunner {
// ggml_backend_t backend = ggml_backend_cuda_init(0); // ggml_backend_t backend = ggml_backend_cuda_init(0);
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_F16; ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, backend); auto model_manager = std::make_shared<ModelManager>();
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, String2TensorStorage{}, "", model_manager);
{ {
LOG_INFO("loading from '%s'", file_path.c_str()); LOG_INFO("loading from '%s'", file_path.c_str());
if (!mmdit->alloc_params_buffer()) { ModelLoader& model_loader = model_manager->loader();
LOG_ERROR("mmdit embeds buffer allocation failed");
return;
}
std::map<std::string, ggml_tensor*> tensors;
mmdit->get_param_tensors(tensors, "model.diffusion_model");
ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(file_path)) { if (!model_loader.init_from_file_and_convert_name(file_path)) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
} }
bool success = model_loader.load_tensors(tensors); if (!model_manager->register_runner_params("MMDiT test",
*mmdit,
if (!success) { "model.diffusion_model",
LOG_ERROR("load tensors from model loader failed"); ModelManager::ResidencyMode::Resident,
backend,
backend) ||
!model_manager->validate_registered_tensors()) {
LOG_ERROR("register mmdit tensors with model manager failed");
return; return;
} }

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__ #ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__
#define __SD_MODEL_DIFFUSION_MODEL_HPP__ #define __SD_MODEL_DIFFUSION_MODEL_HPP__
#include <string> #include <string>
@ -7,6 +7,7 @@
#include "core/ggml_extend.hpp" #include "core/ggml_extend.hpp"
#include "core/tensor_ggml.hpp" #include "core/tensor_ggml.hpp"
#include "model_manager.h"
struct UNetDiffusionExtra { struct UNetDiffusionExtra {
int num_video_frames = -1; int num_video_frames = -1;
@ -88,9 +89,9 @@ protected:
public: public:
DiffusionModelRunner(ggml_backend_t backend, DiffusionModelRunner(ggml_backend_t backend,
ggml_backend_t params_backend, const std::string& prefix,
const std::string& prefix) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, params_backend), : GGMLRunner(backend, weight_manager),
prefix(prefix) {} prefix(prefix) {}
virtual sd::Tensor<float> compute(int n_threads, virtual sd::Tensor<float> compute(int n_threads,

View File

@ -710,10 +710,10 @@ namespace Pid {
std::vector<float> pixel_pos_comp_vec; std::vector<float> pixel_pos_comp_vec;
PiDRunner(ggml_backend_t backend, PiDRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix = "model.diffusion_model") const std::string prefix = "model.diffusion_model",
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(PixelDiTConfig::detect_from_weights(tensor_storage_map, prefix)) { config(PixelDiTConfig::detect_from_weights(tensor_storage_map, prefix)) {
model = PixelDiT(config); model = PixelDiT(config);
model.init(params_ctx, tensor_storage_map, prefix); model.init(params_ctx, tensor_storage_map, prefix);

View File

@ -518,12 +518,12 @@ namespace Qwen {
SDVersion version; SDVersion version;
QwenImageRunner(ggml_backend_t backend, QwenImageRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
SDVersion version = VERSION_QWEN_IMAGE, SDVersion version = VERSION_QWEN_IMAGE,
bool zero_cond_t = false) bool zero_cond_t = false,
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(QwenImageConfig::detect_from_weights(tensor_storage_map, prefix)) { config(QwenImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
config.zero_cond_t = config.zero_cond_t || zero_cond_t; config.zero_cond_t = config.zero_cond_t || zero_cond_t;
qwen_image = QwenImageModel(config); qwen_image = QwenImageModel(config);
@ -691,7 +691,8 @@ namespace Qwen {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_Q8_0; ggml_type model_data_type = GGML_TYPE_Q8_0;
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -705,23 +706,20 @@ namespace Qwen {
} }
std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend, std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
VERSION_QWEN_IMAGE); VERSION_QWEN_IMAGE,
false,
model_manager);
if (!qwen_image->alloc_params_buffer()) { if (!model_manager->register_runner_params("Qwen image test",
LOG_ERROR("qwen_image buffer allocation failed"); *qwen_image,
return; "model.diffusion_model",
} ModelManager::ResidencyMode::Resident,
backend,
std::map<std::string, ggml_tensor*> tensors; backend) ||
qwen_image->get_param_tensors(tensors, "model.diffusion_model"); !model_manager->validate_registered_tensors()) {
LOG_ERROR("register qwen_image tensors with model manager failed");
bool success = model_loader.load_tensors(tensors);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -694,11 +694,11 @@ struct UNetModelRunner : public DiffusionModelRunner {
UnetModelBlock unet; UnetModelBlock unet;
UNetModelRunner(ggml_backend_t backend, UNetModelRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
SDVersion version = VERSION_SD1) SDVersion version = VERSION_SD1,
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(UNetConfig::detect_from_weights(tensor_storage_map, prefix, version)), config(UNetConfig::detect_from_weights(tensor_storage_map, prefix, version)),
unet(config) { unet(config) {
unet.init(params_ctx, tensor_storage_map, prefix); unet.init(params_ctx, tensor_storage_map, prefix);

View File

@ -799,11 +799,11 @@ namespace WAN {
SDVersion version; SDVersion version;
WanRunner(ggml_backend_t backend, WanRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
SDVersion version = VERSION_WAN2) SDVersion version = VERSION_WAN2,
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(WanConfig::detect_from_weights(tensor_storage_map, prefix)) { config(WanConfig::detect_from_weights(tensor_storage_map, prefix)) {
if (config.num_layers == 30) { if (config.num_layers == 30) {
if (version == VERSION_WAN2_2_TI2V) { if (version == VERSION_WAN2_2_TI2V) {
@ -1017,7 +1017,8 @@ namespace WAN {
ggml_type model_data_type = GGML_TYPE_F16; ggml_type model_data_type = GGML_TYPE_F16;
LOG_INFO("loading from '%s'", file_path.c_str()); LOG_INFO("loading from '%s'", file_path.c_str());
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -1031,23 +1032,19 @@ namespace WAN {
} }
std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend, std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
VERSION_WAN2_2_TI2V); VERSION_WAN2_2_TI2V,
model_manager);
if (!wan->alloc_params_buffer()) { if (!model_manager->register_runner_params("Wan test",
LOG_ERROR("wan buffer allocation failed"); *wan,
return; "model.diffusion_model",
} ModelManager::ResidencyMode::Resident,
backend,
std::map<std::string, ggml_tensor*> tensors; backend) ||
wan->get_param_tensors(tensors, "model.diffusion_model"); !model_manager->validate_registered_tensors()) {
LOG_ERROR("register wan tensors with model manager failed");
bool success = model_loader.load_tensors(tensors);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -553,11 +553,11 @@ namespace ZImage {
SDVersion version; SDVersion version;
ZImageRunner(ggml_backend_t backend, ZImageRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
SDVersion version = VERSION_Z_IMAGE) SDVersion version = VERSION_Z_IMAGE,
: DiffusionModelRunner(backend, params_backend, prefix), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: DiffusionModelRunner(backend, prefix, weight_manager),
config(ZImageConfig::detect_from_weights(tensor_storage_map, prefix)) { config(ZImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
z_image = ZImageModel(config); z_image = ZImageModel(config);
z_image.init(params_ctx, tensor_storage_map, prefix); z_image.init(params_ctx, tensor_storage_map, prefix);
@ -698,7 +698,8 @@ namespace ZImage {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_Q8_0; ggml_type model_data_type = GGML_TYPE_Q8_0;
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -714,22 +715,19 @@ namespace ZImage {
} }
std::shared_ptr<ZImageRunner> z_image = std::make_shared<ZImageRunner>(backend, std::shared_ptr<ZImageRunner> z_image = std::make_shared<ZImageRunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
VERSION_QWEN_IMAGE); VERSION_QWEN_IMAGE,
model_manager);
if (!z_image->alloc_params_buffer()) { if (!model_manager->register_runner_params("ZImage test",
LOG_ERROR("z_image buffer allocation failed"); *z_image,
return; "model.diffusion_model",
} ModelManager::ResidencyMode::Resident,
std::map<std::string, ggml_tensor*> tensors; backend,
z_image->get_param_tensors(tensors, "model.diffusion_model"); backend) ||
!model_manager->validate_registered_tensors()) {
bool success = model_loader.load_tensors(tensors); LOG_ERROR("register z_image tensors with model manager failed");
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_TE_CLIP_HPP__ #ifndef __SD_MODEL_TE_CLIP_HPP__
#define __SD_MODEL_TE_CLIP_HPP__ #define __SD_MODEL_TE_CLIP_HPP__
#include "core/ggml_extend.hpp" #include "core/ggml_extend.hpp"
@ -469,13 +469,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
std::vector<float> attention_mask_vec; std::vector<float> attention_mask_vec;
CLIPTextModelRunner(ggml_backend_t backend, CLIPTextModelRunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
CLIPVersion version = OPENAI_CLIP_VIT_L_14, CLIPVersion version = OPENAI_CLIP_VIT_L_14,
bool with_final_ln = true, bool with_final_ln = true,
bool force_clip_f32 = false) bool force_clip_f32 = false,
: GGMLRunner(backend, params_backend) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager) {
bool proj_in = false; bool proj_in = false;
for (const auto& [name, tensor_storage] : tensor_storage_map) { for (const auto& [name, tensor_storage] : tensor_storage_map) {
if (!starts_with(name, prefix)) { if (!starts_with(name, prefix)) {

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_TE_LLM_HPP__ #ifndef __SD_MODEL_TE_LLM_HPP__
#define __SD_MODEL_TE_LLM_HPP__ #define __SD_MODEL_TE_LLM_HPP__
#include <algorithm> #include <algorithm>
@ -22,6 +22,7 @@
#include "json.hpp" #include "json.hpp"
#include "model/common/rope.hpp" #include "model/common/rope.hpp"
#include "model_loader.h" #include "model_loader.h"
#include "model_manager.h"
#include "tokenizers/bpe_tokenizer.h" #include "tokenizers/bpe_tokenizer.h"
#include "tokenizers/gemma_tokenizer.h" #include "tokenizers/gemma_tokenizer.h"
#include "tokenizers/gpt_oss_tokenizer.h" #include "tokenizers/gpt_oss_tokenizer.h"
@ -1571,11 +1572,11 @@ namespace LLM {
public: public:
LLMRunner(LLMArch arch, LLMRunner(LLMArch arch,
ggml_backend_t backend, ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
bool enable_vision_ = false) bool enable_vision_ = false,
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
config(LLMConfig::detect_from_weights(tensor_storage_map, prefix, arch)), config(LLMConfig::detect_from_weights(tensor_storage_map, prefix, arch)),
enable_vision(enable_vision_) { enable_vision(enable_vision_) {
if (enable_vision && !config.have_vision_weight) { if (enable_vision && !config.have_vision_weight) {
@ -1822,11 +1823,11 @@ namespace LLM {
LLMEmbedder(LLMArch arch, LLMEmbedder(LLMArch arch,
ggml_backend_t backend, ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
bool enable_vision = false) bool enable_vision = false,
: model(arch, backend, params_backend, tensor_storage_map, prefix, enable_vision) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: model(arch, backend, tensor_storage_map, prefix, enable_vision, weight_manager) {
if (arch == LLMArch::MISTRAL_SMALL_3_2 || arch == LLMArch::MINISTRAL_3_3B) { if (arch == LLMArch::MISTRAL_SMALL_3_2 || arch == LLMArch::MINISTRAL_3_3B) {
tokenizer = std::make_shared<MistralTokenizer>(); tokenizer = std::make_shared<MistralTokenizer>();
} else if (arch == LLMArch::GPT_OSS_20B) { } else if (arch == LLMArch::GPT_OSS_20B) {
@ -1840,13 +1841,6 @@ namespace LLM {
model.get_param_tensors(tensors, prefix); model.get_param_tensors(tensors, prefix);
} }
bool alloc_params_buffer() {
if (!model.alloc_params_buffer()) {
return false;
}
return true;
}
std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text, std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
std::pair<int, int> attn_range, std::pair<int, int> attn_range,
size_t max_length = 0, size_t max_length = 0,
@ -2062,7 +2056,8 @@ namespace LLM {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_COUNT; ggml_type model_data_type = GGML_TYPE_COUNT;
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path, "text_encoders.llm.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "text_encoders.llm.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -2080,24 +2075,20 @@ namespace LLM {
LLMArch arch = LLMArch::QWEN3; LLMArch arch = LLMArch::QWEN3;
std::shared_ptr<LLMEmbedder> llm = std::make_shared<LLMEmbedder>(arch, std::shared_ptr<LLMEmbedder> llm = std::make_shared<LLMEmbedder>(arch,
backend,
backend, backend,
tensor_storage_map, tensor_storage_map,
"text_encoders.llm", "text_encoders.llm",
true); true,
model_manager);
if (!llm->alloc_params_buffer()) { if (!model_manager->register_runner_params("LLM test",
LOG_ERROR("llm model allocation failed"); *llm,
return; "text_encoders.llm",
} ModelManager::ResidencyMode::Resident,
backend,
std::map<std::string, ggml_tensor*> tensors; backend) ||
llm->get_param_tensors(tensors, "text_encoders.llm"); !model_manager->validate_registered_tensors()) {
LOG_ERROR("register llm tensors with model manager failed");
bool success = model_loader.load_tensors(tensors);
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_TE_T5_HPP__ #ifndef __SD_MODEL_TE_T5_HPP__
#define __SD_MODEL_TE_T5_HPP__ #define __SD_MODEL_TE_T5_HPP__
#include <cfloat> #include <cfloat>
@ -12,6 +12,7 @@
#include "core/ggml_extend.hpp" #include "core/ggml_extend.hpp"
#include "model_loader.h" #include "model_loader.h"
#include "model_manager.h"
#include "tokenizers/t5_unigram_tokenizer.h" #include "tokenizers/t5_unigram_tokenizer.h"
struct T5Config { struct T5Config {
@ -334,11 +335,11 @@ struct T5Runner : public GGMLRunner {
std::vector<int> relative_position_bucket_vec; std::vector<int> relative_position_bucket_vec;
T5Runner(ggml_backend_t backend, T5Runner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
bool is_umt5 = false) bool is_umt5 = false,
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
config(T5Config::detect_from_weights(tensor_storage_map, prefix, is_umt5)) { config(T5Config::detect_from_weights(tensor_storage_map, prefix, is_umt5)) {
model = T5(config); model = T5(config);
model.init(params_ctx, tensor_storage_map, prefix); model.init(params_ctx, tensor_storage_map, prefix);
@ -477,24 +478,17 @@ struct T5Embedder {
T5Runner model; T5Runner model;
T5Embedder(ggml_backend_t backend, T5Embedder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
bool is_umt5 = false) bool is_umt5 = false,
: model(backend, params_backend, tensor_storage_map, prefix, is_umt5), tokenizer(is_umt5) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: model(backend, tensor_storage_map, prefix, is_umt5, weight_manager), tokenizer(is_umt5) {
} }
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) { void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
model.get_param_tensors(tensors, prefix); model.get_param_tensors(tensors, prefix);
} }
bool alloc_params_buffer() {
if (!model.alloc_params_buffer()) {
return false;
}
return true;
}
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text, std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
size_t max_length = 0, size_t max_length = 0,
bool padding = false) { bool padding = false) {
@ -579,7 +573,8 @@ struct T5Embedder {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_F16; ggml_type model_data_type = GGML_TYPE_F16;
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(file_path)) { if (!model_loader.init_from_file_and_convert_name(file_path)) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
@ -592,19 +587,16 @@ struct T5Embedder {
} }
} }
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, backend, tensor_storage_map, "", true); std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, tensor_storage_map, "", true, model_manager);
if (!t5->alloc_params_buffer()) { if (!model_manager->register_runner_params("T5 test",
LOG_ERROR("t5 params buffer allocation failed"); *t5,
return; "",
} ModelManager::ResidencyMode::Resident,
std::map<std::string, ggml_tensor*> tensors; backend,
t5->get_param_tensors(tensors, ""); backend) ||
!model_manager->validate_registered_tensors()) {
bool success = model_loader.load_tensors(tensors); LOG_ERROR("register t5 tensors with model manager failed");
if (!success) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__ #ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__
#define __SD_MODEL_UPSCALER_ESRGAN_HPP__ #define __SD_MODEL_UPSCALER_ESRGAN_HPP__
#include <algorithm> #include <algorithm>
@ -229,9 +229,9 @@ struct ESRGAN : public GGMLRunner {
std::unique_ptr<RRDBNet> rrdb_net; std::unique_ptr<RRDBNet> rrdb_net;
ESRGAN(ggml_backend_t backend, ESRGAN(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map = {},
const String2TensorStorage& tensor_storage_map = {}) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, params_backend), : GGMLRunner(backend, weight_manager),
config(ESRGANConfig::detect_from_weights(tensor_storage_map)), config(ESRGANConfig::detect_from_weights(tensor_storage_map)),
rrdb_net(std::make_unique<RRDBNet>(config)) { rrdb_net(std::make_unique<RRDBNet>(config)) {
rrdb_net->init(params_ctx, tensor_storage_map, ""); rrdb_net->init(params_ctx, tensor_storage_map, "");

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__ #ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
#define __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__ #define __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
#include <algorithm> #include <algorithm>
@ -433,9 +433,9 @@ namespace LTXVUpsampler {
std::unique_ptr<LatentUpsampler> model; std::unique_ptr<LatentUpsampler> model;
LatentUpsamplerRunner(ggml_backend_t backend, LatentUpsamplerRunner(ggml_backend_t backend,
ggml_backend_t params_backend, const String2TensorStorage& tensor_storage_map,
const String2TensorStorage& tensor_storage_map) std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, params_backend), : GGMLRunner(backend, weight_manager),
config(LatentUpsamplerConfig::detect_from_weights(tensor_storage_map)) { config(LatentUpsamplerConfig::detect_from_weights(tensor_storage_map)) {
if (config.dims != 3 || (!config.spatial_upsample && !config.temporal_upsample) || if (config.dims != 3 || (!config.spatial_upsample && !config.temporal_upsample) ||
config.spatial_up_num < 1 || config.spatial_down_den < 1 || config.temporal_up_factor < 1) { config.spatial_up_num < 1 || config.spatial_down_den < 1 || config.temporal_up_factor < 1) {

View File

@ -213,9 +213,9 @@ protected:
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1); params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
} }
float get_alpha() { ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]); auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
return sigmoid(alpha); return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
} }
public: public:
@ -250,10 +250,12 @@ public:
x = time_stack->forward(ctx, x); // b t c (h w) x = time_stack->forward(ctx, x); // b t c (h w)
float alpha = get_alpha(); auto alpha = get_alpha(ctx);
x = ggml_add(ctx->ggml_ctx, x = ggml_add(ctx->ggml_ctx,
ggml_ext_scale(ctx->ggml_ctx, x, alpha), x_mix,
ggml_ext_scale(ctx->ggml_ctx, x_mix, 1.0f - alpha)); ggml_mul(ctx->ggml_ctx,
ggml_sub(ctx->ggml_ctx, x, x_mix),
alpha));
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w) x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
@ -664,13 +666,13 @@ struct AutoEncoderKL : public VAE {
AutoEncoderKLModel ae; AutoEncoderKLModel ae;
AutoEncoderKL(ggml_backend_t backend, AutoEncoderKL(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
bool decode_only = false, bool decode_only = false,
bool use_video_decoder = false, bool use_video_decoder = false,
SDVersion version = VERSION_SD1) SDVersion version = VERSION_SD1,
: VAE(version, backend, params_backend, prefix), decode_only(decode_only) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only) {
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) { if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
scale_factor = 0.18215f; scale_factor = 0.18215f;
shift_factor = 0.f; shift_factor = 0.f;

View File

@ -1,4 +1,4 @@
#ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__ #ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
#define __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__ #define __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
#include <cmath> #include <cmath>
@ -9,6 +9,7 @@
#include "core/ggml_extend.hpp" #include "core/ggml_extend.hpp"
#include "model_loader.h" #include "model_loader.h"
#include "model_manager.h"
namespace LTXV { namespace LTXV {
@ -1001,10 +1002,10 @@ namespace LTXV {
sd::Tensor<float> bwe_skip_filter_tensor; sd::Tensor<float> bwe_skip_filter_tensor;
LTXAudioVAERunner(ggml_backend_t backend, LTXAudioVAERunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string& prefix = "") const std::string& prefix = "",
: GGMLRunner(backend, params_backend), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: GGMLRunner(backend, weight_manager),
weight_prefix(prefix), weight_prefix(prefix),
config(LTXAudioVAEConfig::detect_from_weights(tensor_storage_map)), config(LTXAudioVAEConfig::detect_from_weights(tensor_storage_map)),
model(config) { model(config) {
@ -1019,7 +1020,7 @@ namespace LTXV {
model.get_param_tensors(tensors, weight_prefix); model.get_param_tensors(tensors, weight_prefix);
} }
size_t get_params_buffer_size() { size_t get_params_mem_size() {
return model.get_params_mem_size(); return model.get_params_mem_size();
} }
@ -1066,7 +1067,8 @@ namespace LTXV {
// ggml_backend_t backend = ggml_backend_cuda_init(0); // ggml_backend_t backend = ggml_backend_cuda_init(0);
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str()); LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file(model_path)) { if (!model_loader.init_from_file(model_path)) {
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
return; return;
@ -1074,20 +1076,17 @@ namespace LTXV {
auto& tensor_storage_map = model_loader.get_tensor_storage_map(); auto& tensor_storage_map = model_loader.get_tensor_storage_map();
auto ltx_audio_vae = std::make_shared<LTXAudioVAERunner>(backend, auto ltx_audio_vae = std::make_shared<LTXAudioVAERunner>(backend,
backend,
tensor_storage_map, tensor_storage_map,
prefix); prefix,
model_manager);
if (!ltx_audio_vae->alloc_params_buffer()) { if (!model_manager->register_runner_params("LTX audio VAE test",
LOG_ERROR("ltx audio vae buffer allocation failed"); *ltx_audio_vae,
return; ModelManager::ResidencyMode::Resident,
} backend,
backend) ||
std::map<std::string, ggml_tensor*> tensors; !model_manager->validate_registered_tensors()) {
ltx_audio_vae->get_param_tensors(tensors); LOG_ERROR("register ltx audio vae tensors with model manager failed");
if (!model_loader.load_tensors(tensors)) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -957,8 +957,8 @@ namespace LTXVAE {
ggml_tensor* scaled_timestep = timestep; ggml_tensor* scaled_timestep = timestep;
if (timestep_conditioning) { if (timestep_conditioning) {
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]); auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier); scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
} }
x = conv_in->forward(ctx, x, causal_decoder); x = conv_in->forward(ctx, x, causal_decoder);
@ -1008,8 +1008,8 @@ namespace LTXVAE {
ggml_tensor* scaled_timestep = timestep; ggml_tensor* scaled_timestep = timestep;
if (timestep_conditioning && timestep != nullptr) { if (timestep_conditioning && timestep != nullptr) {
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]); auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier); scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
} }
// conv_in with feat_map for left temporal context // conv_in with feat_map for left temporal context
@ -1223,11 +1223,11 @@ struct LTXVideoVAE : public VAE {
LTXVAE::VideoVAE vae; LTXVAE::VideoVAE vae;
LTXVideoVAE(ggml_backend_t backend, LTXVideoVAE(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string& prefix, const std::string& prefix,
bool decode_only = true, bool decode_only = true,
SDVersion version = VERSION_LTXAV) SDVersion version = VERSION_LTXAV,
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: decode_only(decode_only), : decode_only(decode_only),
ltx_vae_version(LTXVAE::detect_ltx_vae_version(tensor_storage_map, prefix)), ltx_vae_version(LTXVAE::detect_ltx_vae_version(tensor_storage_map, prefix)),
timestep_conditioning(LTXVAE::detect_ltx_vae_timestep_conditioning(tensor_storage_map, prefix)), timestep_conditioning(LTXVAE::detect_ltx_vae_timestep_conditioning(tensor_storage_map, prefix)),
@ -1239,7 +1239,7 @@ struct LTXVideoVAE : public VAE {
patch_size, patch_size,
tensor_storage_map, tensor_storage_map,
prefix), prefix),
VAE(version, backend, params_backend, prefix) { VAE(version, backend, prefix, weight_manager) {
vae.init(params_ctx, tensor_storage_map, prefix); vae.init(params_ctx, tensor_storage_map, prefix);
decode_timestep_tensor.values()[0] = vae.decode_timestep; decode_timestep_tensor.values()[0] = vae.decode_timestep;
} }
@ -1521,7 +1521,8 @@ struct LTXVideoVAE : public VAE {
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
LOG_INFO("loading ltx vae from '%s'", model_path.c_str()); LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
ModelLoader model_loader; auto model_manager = std::make_shared<ModelManager>();
ModelLoader& model_loader = model_manager->loader();
if (!model_loader.init_from_file_and_convert_name(model_path, "vae.")) { if (!model_loader.init_from_file_and_convert_name(model_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
return; return;
@ -1529,22 +1530,19 @@ struct LTXVideoVAE : public VAE {
auto& tensor_storage_map = model_loader.get_tensor_storage_map(); auto& tensor_storage_map = model_loader.get_tensor_storage_map();
std::shared_ptr<LTXVideoVAE> vae = std::make_shared<LTXVideoVAE>(backend, std::shared_ptr<LTXVideoVAE> vae = std::make_shared<LTXVideoVAE>(backend,
backend,
tensor_storage_map, tensor_storage_map,
"first_stage_model", "first_stage_model",
true, true,
VERSION_LTXAV); VERSION_LTXAV,
model_manager);
if (!vae->alloc_params_buffer()) { if (!model_manager->register_runner_params("LTX VAE test",
LOG_ERROR("vae buffer allocation failed"); *vae,
return; ModelManager::ResidencyMode::Resident,
} backend,
backend) ||
std::map<std::string, ggml_tensor*> tensors; !model_manager->validate_registered_tensors()) {
vae->get_param_tensors(tensors); LOG_ERROR("register ltx vae tensors with model manager failed");
if (!model_loader.load_tensors(tensors)) {
LOG_ERROR("load tensors from model loader failed");
return; return;
} }

View File

@ -623,12 +623,12 @@ struct TinyImageAutoEncoder : public VAE {
bool decode_only = false; bool decode_only = false;
TinyImageAutoEncoder(ggml_backend_t backend, TinyImageAutoEncoder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
bool decoder_only = true, bool decoder_only = true,
SDVersion version = VERSION_SD1) SDVersion version = VERSION_SD1,
: VAE(version, backend, params_backend, "tae"), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: VAE(version, backend, "tae", weight_manager),
decode_only(decoder_only), decode_only(decoder_only),
taesd(decoder_only, version) { taesd(decoder_only, version) {
scale_input = false; scale_input = false;
@ -686,12 +686,12 @@ struct TinyVideoAutoEncoder : public VAE {
bool is_wide = false; bool is_wide = false;
TinyVideoAutoEncoder(ggml_backend_t backend, TinyVideoAutoEncoder(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map, const String2TensorStorage& tensor_storage_map,
const std::string prefix, const std::string prefix,
bool decoder_only = true, bool decoder_only = true,
SDVersion version = VERSION_WAN2) SDVersion version = VERSION_WAN2,
: VAE(version, backend, params_backend, "tae"), std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: VAE(version, backend, "tae", weight_manager),
decode_only(decoder_only) { decode_only(decoder_only) {
for (auto tensor_storage : tensor_storage_map) { for (auto tensor_storage : tensor_storage_map) {
if (tensor_storage.first.find(prefix + ".3.conv.6.weight") != std::string::npos) { if (tensor_storage.first.find(prefix + ".3.conv.6.weight") != std::string::npos) {

View File

@ -1,8 +1,9 @@
#ifndef __SD_MODEL_VAE_VAE_HPP__ #ifndef __SD_MODEL_VAE_VAE_HPP__
#define __SD_MODEL_VAE_VAE_HPP__ #define __SD_MODEL_VAE_VAE_HPP__
#include "core/tensor_ggml.hpp" #include "core/tensor_ggml.hpp"
#include "model/common/block.hpp" #include "model/common/block.hpp"
#include "model_manager.h"
struct VAE : public GGMLRunner { struct VAE : public GGMLRunner {
protected: protected:
@ -63,8 +64,11 @@ protected:
} }
public: public:
VAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend, const std::string& weight_prefix = "") VAE(SDVersion version,
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, params_backend) {} ggml_backend_t backend,
const std::string& weight_prefix = "",
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, weight_manager) {}
int get_scale_factor() { int get_scale_factor() {
int scale_factor = 8; int scale_factor = 8;
@ -224,8 +228,10 @@ public:
}; };
struct FakeVAE : public VAE { struct FakeVAE : public VAE {
FakeVAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend) FakeVAE(SDVersion version,
: VAE(version, backend, params_backend) {} ggml_backend_t backend,
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: VAE(version, backend, "", weight_manager) {}
int get_encoder_output_channels(int input_channels) { int get_encoder_output_channels(int input_channels) {
return input_channels; return input_channels;

View File

@ -1124,12 +1124,12 @@ namespace WAN {
WanVAE ae; WanVAE ae;
WanVAERunner(ggml_backend_t backend, WanVAERunner(ggml_backend_t backend,
ggml_backend_t params_backend,
const String2TensorStorage& tensor_storage_map = {}, const String2TensorStorage& tensor_storage_map = {},
const std::string prefix = "", const std::string prefix = "",
bool decode_only = false, bool decode_only = false,
SDVersion version = VERSION_WAN2) SDVersion version = VERSION_WAN2,
: VAE(version, backend, params_backend, prefix), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) { std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) {
ae.init(params_ctx, tensor_storage_map, prefix); ae.init(params_ctx, tensor_storage_map, prefix);
} }
@ -1327,27 +1327,24 @@ namespace WAN {
// ggml_backend_t backend = ggml_backend_cuda_init(0); // ggml_backend_t backend = ggml_backend_cuda_init(0);
ggml_backend_t backend = sd_backend_cpu_init(); ggml_backend_t backend = sd_backend_cpu_init();
ggml_type model_data_type = GGML_TYPE_F16; ggml_type model_data_type = GGML_TYPE_F16;
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V); auto model_manager = std::make_shared<ModelManager>();
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V, model_manager);
{ {
LOG_INFO("loading from '%s'", file_path.c_str()); LOG_INFO("loading from '%s'", file_path.c_str());
if (!vae->alloc_params_buffer()) { ModelLoader& model_loader = model_manager->loader();
LOG_ERROR("vae buffer allocation failed");
return;
}
std::map<std::string, ggml_tensor*> tensors;
vae->get_param_tensors(tensors);
ModelLoader model_loader;
if (!model_loader.init_from_file_and_convert_name(file_path, "vae.")) { if (!model_loader.init_from_file_and_convert_name(file_path, "vae.")) {
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str()); LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
return; return;
} }
bool success = model_loader.load_tensors(tensors); if (!model_manager->register_runner_params("Wan VAE test",
*vae,
if (!success) { ModelManager::ResidencyMode::Resident,
LOG_ERROR("load tensors from model loader failed"); backend,
backend) ||
!model_manager->validate_registered_tensors()) {
LOG_ERROR("register wan vae tensors with model manager failed");
return; return;
} }

View File

@ -121,6 +121,42 @@ public:
ggml_backend_t compute_backend, ggml_backend_t compute_backend,
ggml_backend_t params_backend, ggml_backend_t params_backend,
size_t* registered_tensor_size = nullptr); size_t* registered_tensor_size = nullptr);
template <typename Runner>
bool register_runner_params(const std::string& desc,
Runner& runner,
ResidencyMode residency_mode,
ggml_backend_t compute_backend,
ggml_backend_t params_backend,
size_t* registered_tensor_size = nullptr) {
std::map<std::string, ggml_tensor*> tensors;
runner.get_param_tensors(tensors);
return register_param_tensors(desc,
std::move(tensors),
residency_mode,
compute_backend,
params_backend,
registered_tensor_size);
}
template <typename Runner>
bool register_runner_params(const std::string& desc,
Runner& runner,
const std::string& prefix,
ResidencyMode residency_mode,
ggml_backend_t compute_backend,
ggml_backend_t params_backend,
size_t* registered_tensor_size = nullptr) {
std::map<std::string, ggml_tensor*> tensors;
runner.get_param_tensors(tensors, prefix);
return register_param_tensors(desc,
std::move(tensors),
residency_mode,
compute_backend,
params_backend,
registered_tensor_size);
}
bool validate_registered_tensors(); bool validate_registered_tensors();
bool prepare_params(const std::vector<ggml_tensor*>& tensors) override; bool prepare_params(const std::vector<ggml_tensor*>& tensors) override;

View File

@ -241,7 +241,6 @@ public:
} }
std::map<std::string, ggml_tensor*> group_tensors; std::map<std::string, ggml_tensor*> group_tensors;
model->get_param_tensors(group_tensors); model->get_param_tensors(group_tensors);
model->set_weight_manager(model_manager);
if (model_manager == nullptr) { if (model_manager == nullptr) {
return true; return true;
} }
@ -586,33 +585,35 @@ public:
if (sd_version_is_sd3(version)) { if (sd_version_is_sd3(version)) {
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<SD3CLIPEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map);
diffusion_model = std::make_shared<MMDiTRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); model_manager);
diffusion_model = std::make_shared<MMDiTRunner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model",
model_manager);
} else if (sd_version_is_pid(version)) { } else if (sd_version_is_pid(version)) {
vae_decode_only = false; vae_decode_only = false;
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map,
version);
diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model.net");
} else if (sd_version_is_ideogram4(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version, version,
"", "",
false); false,
diffusion_model = std::make_shared<Ideogram4::Ideogram4Runner>(backend_for(SDBackendModule::DIFFUSION), model_manager);
params_backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); "model.diffusion_model.net",
model_manager);
} else if (sd_version_is_ideogram4(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
tensor_storage_map,
version,
"",
false,
model_manager);
diffusion_model = std::make_shared<Ideogram4::Ideogram4Runner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model",
model_manager);
} else if (sd_version_is_flux(version)) { } else if (sd_version_is_flux(version)) {
bool is_chroma = false; bool is_chroma = false;
for (auto pair : tensor_storage_map) { for (auto pair : tensor_storage_map) {
@ -623,66 +624,71 @@ public:
} }
if (is_chroma) { if (is_chroma) {
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
sd_ctx_params->chroma_use_t5_mask, sd_ctx_params->chroma_use_t5_mask,
sd_ctx_params->chroma_t5_mask_pad); sd_ctx_params->chroma_t5_mask_pad,
false,
model_manager);
} else if (version == VERSION_OVIS_IMAGE) { } else if (version == VERSION_OVIS_IMAGE) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version, version,
"", "",
false); false,
model_manager);
} else { } else {
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<FluxCLIPEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE), tensor_storage_map,
tensor_storage_map); model_manager);
} }
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version, version,
sd_ctx_params->chroma_use_dit_mask); sd_ctx_params->chroma_use_dit_mask,
model_manager);
} else if (sd_version_is_flux2(version)) { } else if (sd_version_is_flux2(version)) {
bool is_chroma = false; bool is_chroma = false;
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version); version,
"",
false,
model_manager);
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version, version,
sd_ctx_params->chroma_use_dit_mask); sd_ctx_params->chroma_use_dit_mask,
model_manager);
} else if (sd_version_is_ltxav(version)) { } else if (sd_version_is_ltxav(version)) {
cond_stage_model = std::make_shared<LTXAVEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LTXAVEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map);
diffusion_model = std::make_shared<LTXV::LTXAVRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); "text_encoders.llm",
"text_embedding_projection",
model_manager);
diffusion_model = std::make_shared<LTXV::LTXAVRunner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model",
model_manager);
} else if (sd_version_is_wan(version)) { } else if (sd_version_is_wan(version)) {
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
true, true,
0, 0,
true); true,
model_manager);
diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version); version,
model_manager);
if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) { if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) {
high_noise_diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION), high_noise_diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.high_noise_diffusion_model", "model.high_noise_diffusion_model",
version); version,
model_manager);
} }
if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" || if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" ||
diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" || diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" ||
@ -691,8 +697,8 @@ public:
return false; return false;
} }
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend_for(SDBackendModule::CLIP_VISION), clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend_for(SDBackendModule::CLIP_VISION),
params_backend_for(SDBackendModule::CLIP_VISION), tensor_storage_map,
tensor_storage_map); model_manager);
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes); clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (!register_runner_params("CLIP vision", if (!register_runner_params("CLIP vision",
clip_vision, clip_vision,
@ -706,93 +712,99 @@ public:
enable_vision = true; enable_vision = true;
} }
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version, version,
"", "",
enable_vision); enable_vision,
model_manager);
diffusion_model = std::make_shared<Qwen::QwenImageRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Qwen::QwenImageRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version, version,
sd_ctx_params->qwen_image_zero_cond_t); sd_ctx_params->qwen_image_zero_cond_t,
model_manager);
} else if (sd_version_is_longcat(version)) { } else if (sd_version_is_longcat(version)) {
bool enable_vision = false; bool enable_vision = false;
if (!vae_decode_only) { if (!vae_decode_only) {
enable_vision = true; enable_vision = true;
} }
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version, version,
"", "",
enable_vision); enable_vision,
model_manager);
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version, version,
sd_ctx_params->chroma_use_dit_mask); sd_ctx_params->chroma_use_dit_mask,
model_manager);
} else if (version == VERSION_HIDREAM_O1) { } else if (version == VERSION_HIDREAM_O1) {
cond_stage_model = std::make_shared<HiDreamO1::HiDreamO1Conditioner>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<HiDreamO1::HiDreamO1Conditioner>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map);
diffusion_model = std::make_shared<HiDreamO1::HiDreamO1Runner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model"); model_manager);
diffusion_model = std::make_shared<HiDreamO1::HiDreamO1Runner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model",
model_manager);
} else if (sd_version_is_anima(version)) { } else if (sd_version_is_anima(version)) {
cond_stage_model = std::make_shared<AnimaConditioner>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<AnimaConditioner>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE), tensor_storage_map,
tensor_storage_map); model_manager);
diffusion_model = std::make_shared<Anima::AnimaRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Anima::AnimaRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model");
} else if (sd_version_is_z_image(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map,
version);
diffusion_model = std::make_shared<ZImage::ZImageRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version); model_manager);
} else if (sd_version_is_z_image(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
tensor_storage_map,
version,
"",
false,
model_manager);
diffusion_model = std::make_shared<ZImage::ZImageRunner>(backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map,
"model.diffusion_model",
version,
model_manager);
} else if (sd_version_is_ernie_image(version)) { } else if (sd_version_is_ernie_image(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version); version,
"",
false,
model_manager);
diffusion_model = std::make_shared<ErnieImage::ErnieImageRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<ErnieImage::ErnieImageRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); "model.diffusion_model",
model_manager);
} else if (sd_version_is_lens(version)) { } else if (sd_version_is_lens(version)) {
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
version); version,
"",
false,
model_manager);
diffusion_model = std::make_shared<Lens::LensRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<Lens::LensRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model"); "model.diffusion_model",
model_manager);
} else { // SD1.x SD2.x SDXL } else { // SD1.x SD2.x SDXL
std::map<std::string, std::string> embbeding_map; std::map<std::string, std::string> embbeding_map;
for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) { for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) {
embbeding_map.emplace(SAFE_STR(sd_ctx_params->embeddings[i].name), SAFE_STR(sd_ctx_params->embeddings[i].path)); embbeding_map.emplace(SAFE_STR(sd_ctx_params->embeddings[i].name), SAFE_STR(sd_ctx_params->embeddings[i].path));
} }
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend_for(SDBackendModule::TE), cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend_for(SDBackendModule::TE),
params_backend_for(SDBackendModule::TE),
tensor_storage_map, tensor_storage_map,
embbeding_map, embbeding_map,
version); version,
model_manager);
diffusion_model = std::make_shared<UNetModelRunner>(backend_for(SDBackendModule::DIFFUSION), diffusion_model = std::make_shared<UNetModelRunner>(backend_for(SDBackendModule::DIFFUSION),
params_backend_for(SDBackendModule::DIFFUSION),
tensor_storage_map, tensor_storage_map,
"model.diffusion_model", "model.diffusion_model",
version); version,
model_manager);
if (sd_ctx_params->diffusion_conv_direct) { if (sd_ctx_params->diffusion_conv_direct) {
LOG_INFO("Using Conv2d direct in the diffusion model"); LOG_INFO("Using Conv2d direct in the diffusion model");
diffusion_model->set_conv2d_direct_enabled(true); diffusion_model->set_conv2d_direct_enabled(true);
@ -841,19 +853,19 @@ public:
sd_version_is_anima(version) || sd_version_is_anima(version) ||
sd_version_is_ltxav(version)) { sd_version_is_ltxav(version)) {
return std::make_shared<TinyVideoAutoEncoder>(backend_for(SDBackendModule::VAE), return std::make_shared<TinyVideoAutoEncoder>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE),
tensor_storage_map, tensor_storage_map,
"decoder", "decoder",
vae_decode_only, vae_decode_only,
version); version,
model_manager);
} else { } else {
auto model = std::make_shared<TinyImageAutoEncoder>(backend_for(SDBackendModule::VAE), auto model = std::make_shared<TinyImageAutoEncoder>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE),
tensor_storage_map, tensor_storage_map,
"decoder.layers", "decoder.layers",
vae_decode_only, vae_decode_only,
version); version,
model_manager);
return model; return model;
} }
}; };
@ -871,28 +883,28 @@ public:
auto create_vae = [&]() -> std::shared_ptr<VAE> { auto create_vae = [&]() -> std::shared_ptr<VAE> {
if (sd_version_is_ltxav(version)) { if (sd_version_is_ltxav(version)) {
return std::make_shared<LTXVideoVAE>(backend_for(SDBackendModule::VAE), return std::make_shared<LTXVideoVAE>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE),
tensor_storage_map, tensor_storage_map,
"first_stage_model", "first_stage_model",
vae_decode_only, vae_decode_only,
version); version,
model_manager);
} else if (sd_version_is_wan(version) || } else if (sd_version_is_wan(version) ||
sd_version_is_qwen_image(version) || sd_version_is_qwen_image(version) ||
sd_version_is_anima(version)) { sd_version_is_anima(version)) {
return std::make_shared<WAN::WanVAERunner>(backend_for(SDBackendModule::VAE), return std::make_shared<WAN::WanVAERunner>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE),
tensor_storage_map, tensor_storage_map,
"first_stage_model", "first_stage_model",
vae_decode_only, vae_decode_only,
version); version,
model_manager);
} else { } else {
auto model = std::make_shared<AutoEncoderKL>(backend_for(SDBackendModule::VAE), auto model = std::make_shared<AutoEncoderKL>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE),
tensor_storage_map, tensor_storage_map,
"first_stage_model", "first_stage_model",
vae_decode_only, vae_decode_only,
false, false,
vae_version); vae_version,
model_manager);
if (sd_version_is_sdxl(version) && if (sd_version_is_sdxl(version) &&
(strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale || external_vae_is_invalid)) { (strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale || external_vae_is_invalid)) {
float vae_conv_2d_scale = 1.f / 32.f; float vae_conv_2d_scale = 1.f / 32.f;
@ -910,7 +922,7 @@ public:
LOG_INFO("using FakeVAE"); LOG_INFO("using FakeVAE");
first_stage_model = std::make_shared<FakeVAE>(version, first_stage_model = std::make_shared<FakeVAE>(version,
backend_for(SDBackendModule::VAE), backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE)); model_manager);
if (!register_runner_params("VAE", if (!register_runner_params("VAE",
first_stage_model, first_stage_model,
SDBackendModule::VAE, SDBackendModule::VAE,
@ -952,8 +964,9 @@ public:
if (use_audio_vae) { if (use_audio_vae) {
audio_vae_model = std::make_shared<LTXV::LTXAudioVAERunner>(backend_for(SDBackendModule::VAE), audio_vae_model = std::make_shared<LTXV::LTXAudioVAERunner>(backend_for(SDBackendModule::VAE),
params_backend_for(SDBackendModule::VAE), tensor_storage_map,
tensor_storage_map); "",
model_manager);
if (!register_runner_params("LTX audio VAE", if (!register_runner_params("LTX audio VAE",
audio_vae_model, audio_vae_model,
SDBackendModule::VAE, SDBackendModule::VAE,
@ -977,7 +990,9 @@ public:
control_net = std::make_shared<ControlNet>(backend_for(SDBackendModule::CONTROL_NET), control_net = std::make_shared<ControlNet>(backend_for(SDBackendModule::CONTROL_NET),
params_backend_for(SDBackendModule::CONTROL_NET), params_backend_for(SDBackendModule::CONTROL_NET),
model_loader.get_tensor_storage_map(), model_loader.get_tensor_storage_map(),
version); version,
"",
model_manager);
if (sd_ctx_params->diffusion_conv_direct) { if (sd_ctx_params->diffusion_conv_direct) {
LOG_INFO("Using Conv2d direct in the control net"); LOG_INFO("Using Conv2d direct in the control net");
control_net->set_conv2d_direct_enabled(true); control_net->set_conv2d_direct_enabled(true);
@ -998,6 +1013,7 @@ public:
version, version,
tensor_storage_map, tensor_storage_map,
model_loader, model_loader,
model_manager,
n_threads, n_threads,
[this](SDBackendModule module) { return ensure_backend_pair(module); }, [this](SDBackendModule module) { return ensure_backend_pair(module); },
[this](SDBackendModule module) { return backend_for(module); }, [this](SDBackendModule module) { return backend_for(module); },
@ -5008,8 +5024,8 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
std::unique_ptr<LTXVUpsampler::LatentUpsamplerRunner> upsampler = std::unique_ptr<LTXVUpsampler::LatentUpsamplerRunner> upsampler =
std::make_unique<LTXVUpsampler::LatentUpsamplerRunner>(sd_ctx->sd->backend_for(SDBackendModule::UPSCALER), std::make_unique<LTXVUpsampler::LatentUpsamplerRunner>(sd_ctx->sd->backend_for(SDBackendModule::UPSCALER),
sd_ctx->sd->params_backend_for(SDBackendModule::UPSCALER), model_loader.get_tensor_storage_map(),
model_loader.get_tensor_storage_map()); upsampler_manager);
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram); const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
upsampler->set_max_graph_vram_bytes(max_graph_vram_bytes); upsampler->set_max_graph_vram_bytes(max_graph_vram_bytes);
if (upsampler->model == nullptr) { if (upsampler->model == nullptr) {
@ -5019,7 +5035,6 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
std::map<std::string, ggml_tensor*> tensors; std::map<std::string, ggml_tensor*> tensors;
upsampler->get_param_tensors(tensors); upsampler->get_param_tensors(tensors);
upsampler->set_weight_manager(upsampler_manager);
if (!upsampler_manager->register_param_tensors("LTX latent upsampler", if (!upsampler_manager->register_param_tensors("LTX latent upsampler",
std::move(tensors), std::move(tensors),
ModelManager::ResidencyMode::Resident, ModelManager::ResidencyMode::Resident,

View File

@ -90,8 +90,8 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
model_loader.set_wtype_override(model_data_type); model_loader.set_wtype_override(model_data_type);
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type)); LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER), esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER),
params_backend_for(SDBackendModule::UPSCALER), model_loader.get_tensor_storage_map(),
model_loader.get_tensor_storage_map()); model_manager);
if (esrgan_upscaler == nullptr || esrgan_upscaler->rrdb_net == nullptr) { if (esrgan_upscaler == nullptr || esrgan_upscaler->rrdb_net == nullptr) {
LOG_ERROR("init esrgan model from metadata failed: '%s'", esrgan_path.c_str()); LOG_ERROR("init esrgan model from metadata failed: '%s'", esrgan_path.c_str());
return false; return false;
@ -104,7 +104,6 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
std::map<std::string, ggml_tensor*> tensors; std::map<std::string, ggml_tensor*> tensors;
esrgan_upscaler->get_param_tensors(tensors); esrgan_upscaler->get_param_tensors(tensors);
esrgan_upscaler->set_weight_manager(model_manager);
if (!model_manager->register_param_tensors("ESRGAN", if (!model_manager->register_param_tensors("ESRGAN",
std::move(tensors), std::move(tensors),
ModelManager::ResidencyMode::Resident, ModelManager::ResidencyMode::Resident,