mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-17 03:37:20 +00:00
refactor: route all runner params through model manager (#1649)
This commit is contained in:
parent
9b0fceb41b
commit
8d4c7af95b
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_CONDITIONING_CONDITIONER_HPP__
|
#ifndef __SD_CONDITIONING_CONDITIONER_HPP__
|
||||||
#define __SD_CONDITIONING_CONDITIONER_HPP__
|
#define __SD_CONDITIONING_CONDITIONER_HPP__
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -118,7 +118,6 @@ public:
|
|||||||
virtual void set_stream_layers_enabled(bool enabled) {}
|
virtual void set_stream_layers_enabled(bool enabled) {}
|
||||||
virtual void set_flash_attention_enabled(bool enabled) = 0;
|
virtual void set_flash_attention_enabled(bool enabled) = 0;
|
||||||
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
|
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
|
||||||
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {}
|
|
||||||
virtual void runner_done() {}
|
virtual void runner_done() {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -137,10 +136,10 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
std::map<std::string, std::pair<int, int>> embedding_pos_map;
|
std::map<std::string, std::pair<int, int>> embedding_pos_map;
|
||||||
|
|
||||||
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::map<std::string, std::string>& orig_embedding_map,
|
const std::map<std::string, std::string>& orig_embedding_map,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1,
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) {
|
: version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) {
|
||||||
for (const auto& kv : orig_embedding_map) {
|
for (const auto& kv : orig_embedding_map) {
|
||||||
std::string name = kv.first;
|
std::string name = kv.first;
|
||||||
@ -150,12 +149,12 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
}
|
}
|
||||||
bool force_clip_f32 = !embedding_map.empty();
|
bool force_clip_f32 = !embedding_map.empty();
|
||||||
if (sd_version_is_sd1(version)) {
|
if (sd_version_is_sd1(version)) {
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32);
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32, weight_manager);
|
||||||
} else if (sd_version_is_sd2(version)) {
|
} else if (sd_version_is_sd2(version)) {
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32);
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32, weight_manager);
|
||||||
} else if (sd_version_is_sdxl(version)) {
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32);
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32, weight_manager);
|
||||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32);
|
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32, weight_manager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,13 +193,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
text_model->set_weight_manager(manager);
|
|
||||||
if (sd_version_is_sdxl(version)) {
|
|
||||||
text_model2->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
text_model->runner_done();
|
text_model->runner_done();
|
||||||
if (sd_version_is_sdxl(version)) {
|
if (sd_version_is_sdxl(version)) {
|
||||||
@ -522,9 +514,9 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
|||||||
std::string weight_prefix = "cond_stage_model.transformer";
|
std::string weight_prefix = "cond_stage_model.transformer";
|
||||||
|
|
||||||
FrozenCLIPVisionEmbedder(ggml_backend_t backend,
|
FrozenCLIPVisionEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {})
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: GGMLRunner(backend, params_backend) {
|
: GGMLRunner(backend, weight_manager) {
|
||||||
bool proj_in = false;
|
bool proj_in = false;
|
||||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
if (!starts_with(name, weight_prefix)) {
|
if (!starts_with(name, weight_prefix)) {
|
||||||
@ -580,8 +572,8 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
std::shared_ptr<T5Runner> t5;
|
std::shared_ptr<T5Runner> t5;
|
||||||
|
|
||||||
SD3CLIPEmbedder(ggml_backend_t backend,
|
SD3CLIPEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {})
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: clip_g_tokenizer(0) {
|
: clip_g_tokenizer(0) {
|
||||||
bool use_clip_l = false;
|
bool use_clip_l = false;
|
||||||
bool use_clip_g = false;
|
bool use_clip_g = false;
|
||||||
@ -600,13 +592,13 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (use_clip_l) {
|
if (use_clip_l) {
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, false, weight_manager);
|
||||||
}
|
}
|
||||||
if (use_clip_g) {
|
if (use_clip_g) {
|
||||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, false, weight_manager);
|
||||||
}
|
}
|
||||||
if (use_t5) {
|
if (use_t5) {
|
||||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -670,18 +662,6 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
if (clip_l) {
|
|
||||||
clip_l->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
if (clip_g) {
|
|
||||||
clip_g->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
if (t5) {
|
|
||||||
t5->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
if (clip_l) {
|
if (clip_l) {
|
||||||
clip_l->runner_done();
|
clip_l->runner_done();
|
||||||
@ -961,8 +941,8 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
size_t chunk_len = 256;
|
size_t chunk_len = 256;
|
||||||
|
|
||||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {}) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||||
bool use_clip_l = false;
|
bool use_clip_l = false;
|
||||||
bool use_t5 = false;
|
bool use_t5 = false;
|
||||||
for (auto pair : tensor_storage_map) {
|
for (auto pair : tensor_storage_map) {
|
||||||
@ -979,12 +959,12 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (use_clip_l) {
|
if (use_clip_l) {
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, false, weight_manager);
|
||||||
} else {
|
} else {
|
||||||
LOG_WARN("clip_l text encoder not found! Prompt adherence might be degraded.");
|
LOG_WARN("clip_l text encoder not found! Prompt adherence might be degraded.");
|
||||||
}
|
}
|
||||||
if (use_t5) {
|
if (use_t5) {
|
||||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
|
||||||
} else {
|
} else {
|
||||||
LOG_WARN("t5xxl text encoder not found! Prompt adherence might be degraded.");
|
LOG_WARN("t5xxl text encoder not found! Prompt adherence might be degraded.");
|
||||||
}
|
}
|
||||||
@ -1035,15 +1015,6 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
if (clip_l) {
|
|
||||||
clip_l->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
if (t5) {
|
|
||||||
t5->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
if (clip_l) {
|
if (clip_l) {
|
||||||
clip_l->runner_done();
|
clip_l->runner_done();
|
||||||
@ -1219,11 +1190,11 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||||||
bool is_umt5 = false;
|
bool is_umt5 = false;
|
||||||
|
|
||||||
T5CLIPEmbedder(ggml_backend_t backend,
|
T5CLIPEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
bool use_mask = false,
|
bool use_mask = false,
|
||||||
int mask_pad = 0,
|
int mask_pad = 0,
|
||||||
bool is_umt5 = false)
|
bool is_umt5 = false,
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
||||||
bool use_t5 = false;
|
bool use_t5 = false;
|
||||||
for (auto pair : tensor_storage_map) {
|
for (auto pair : tensor_storage_map) {
|
||||||
@ -1236,7 +1207,7 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||||||
LOG_WARN("IMPORTANT NOTICE: No text encoders provided, cannot process prompts!");
|
LOG_WARN("IMPORTANT NOTICE: No text encoders provided, cannot process prompts!");
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5);
|
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5, weight_manager);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1270,12 +1241,6 @@ struct T5CLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
if (t5) {
|
|
||||||
t5->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
if (t5) {
|
if (t5) {
|
||||||
t5->runner_done();
|
t5->runner_done();
|
||||||
@ -1422,15 +1387,15 @@ struct AnimaConditioner : public Conditioner {
|
|||||||
std::shared_ptr<LLM::LLMRunner> llm;
|
std::shared_ptr<LLM::LLMRunner> llm;
|
||||||
|
|
||||||
AnimaConditioner(ggml_backend_t backend,
|
AnimaConditioner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {}) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||||
qwen_tokenizer = std::make_shared<Qwen2Tokenizer>();
|
qwen_tokenizer = std::make_shared<Qwen2Tokenizer>();
|
||||||
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::QWEN3,
|
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::QWEN3,
|
||||||
backend,
|
backend,
|
||||||
params_backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"text_encoders.llm",
|
"text_encoders.llm",
|
||||||
false);
|
false,
|
||||||
|
weight_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
@ -1453,10 +1418,6 @@ struct AnimaConditioner : public Conditioner {
|
|||||||
llm->set_weight_adapter(adapter);
|
llm->set_weight_adapter(adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
llm->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
llm->runner_done();
|
llm->runner_done();
|
||||||
}
|
}
|
||||||
@ -1545,11 +1506,11 @@ struct LLMEmbedder : public Conditioner {
|
|||||||
std::shared_ptr<LLM::LLMRunner> llm;
|
std::shared_ptr<LLM::LLMRunner> llm;
|
||||||
|
|
||||||
LLMEmbedder(ggml_backend_t backend,
|
LLMEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
SDVersion version = VERSION_QWEN_IMAGE,
|
SDVersion version = VERSION_QWEN_IMAGE,
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
bool enable_vision = false)
|
bool enable_vision = false,
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: version(version) {
|
: version(version) {
|
||||||
LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL;
|
LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL;
|
||||||
if (version == VERSION_FLUX2) {
|
if (version == VERSION_FLUX2) {
|
||||||
@ -1576,10 +1537,10 @@ struct LLMEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
llm = std::make_shared<LLM::LLMRunner>(arch,
|
llm = std::make_shared<LLM::LLMRunner>(arch,
|
||||||
backend,
|
backend,
|
||||||
params_backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"text_encoders.llm",
|
"text_encoders.llm",
|
||||||
enable_vision);
|
enable_vision,
|
||||||
|
weight_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
@ -1604,12 +1565,6 @@ struct LLMEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
if (llm) {
|
|
||||||
llm->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
if (llm) {
|
if (llm) {
|
||||||
llm->runner_done();
|
llm->runner_done();
|
||||||
@ -2106,10 +2061,10 @@ struct LTXAVTextProjectionRunner : public GGMLRunner {
|
|||||||
LTXAVTextProjection model;
|
LTXAVTextProjection model;
|
||||||
|
|
||||||
LTXAVTextProjectionRunner(ggml_backend_t backend,
|
LTXAVTextProjectionRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string& prefix = "")
|
const std::string& prefix = "",
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
model(tensor_storage_map.find(prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end()) {
|
model(tensor_storage_map.find(prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end()) {
|
||||||
model.init(params_ctx, tensor_storage_map, prefix);
|
model.init(params_ctx, tensor_storage_map, prefix);
|
||||||
}
|
}
|
||||||
@ -2154,22 +2109,22 @@ struct LTXAVEmbedder : public Conditioner {
|
|||||||
bool dual_projection = false;
|
bool dual_projection = false;
|
||||||
|
|
||||||
LTXAVEmbedder(ggml_backend_t backend,
|
LTXAVEmbedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string& llm_prefix = "text_encoders.llm",
|
const std::string& llm_prefix = "text_encoders.llm",
|
||||||
const std::string& projector_prefix = "text_embedding_projection") {
|
const std::string& projector_prefix = "text_embedding_projection",
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||||
tokenizer = std::make_shared<GemmaTokenizer>();
|
tokenizer = std::make_shared<GemmaTokenizer>();
|
||||||
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::GEMMA3_12B,
|
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::GEMMA3_12B,
|
||||||
backend,
|
backend,
|
||||||
params_backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
llm_prefix,
|
llm_prefix,
|
||||||
false);
|
false,
|
||||||
|
weight_manager);
|
||||||
dual_projection = tensor_storage_map.find(projector_prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end();
|
dual_projection = tensor_storage_map.find(projector_prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end();
|
||||||
projector = std::make_shared<LTXAVTextProjectionRunner>(backend,
|
projector = std::make_shared<LTXAVTextProjectionRunner>(backend,
|
||||||
params_backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
projector_prefix);
|
projector_prefix,
|
||||||
|
weight_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
@ -2192,11 +2147,6 @@ struct LTXAVEmbedder : public Conditioner {
|
|||||||
projector->set_weight_adapter(adapter);
|
projector->set_weight_adapter(adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
llm->set_weight_manager(manager);
|
|
||||||
projector->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
llm->runner_done();
|
llm->runner_done();
|
||||||
projector->runner_done();
|
projector->runner_done();
|
||||||
|
|||||||
@ -1696,11 +1696,9 @@ protected:
|
|||||||
using GraphCutSegment = sd::ggml_graph_cut::Segment;
|
using GraphCutSegment = sd::ggml_graph_cut::Segment;
|
||||||
using GraphCutPlan = sd::ggml_graph_cut::Plan;
|
using GraphCutPlan = sd::ggml_graph_cut::Plan;
|
||||||
|
|
||||||
ggml_backend_t params_backend = nullptr;
|
|
||||||
ggml_backend_t runtime_backend = nullptr;
|
ggml_backend_t runtime_backend = nullptr;
|
||||||
|
|
||||||
ggml_context* params_ctx = nullptr;
|
ggml_context* params_ctx = nullptr;
|
||||||
ggml_backend_buffer_t params_buffer = nullptr;
|
|
||||||
|
|
||||||
ggml_context* cache_ctx = nullptr;
|
ggml_context* cache_ctx = nullptr;
|
||||||
ggml_backend_buffer_t cache_buffer = nullptr;
|
ggml_backend_buffer_t cache_buffer = nullptr;
|
||||||
@ -1880,9 +1878,6 @@ protected:
|
|||||||
auto manager = weight_manager.lock();
|
auto manager = weight_manager.lock();
|
||||||
if (manager == nullptr) {
|
if (manager == nullptr) {
|
||||||
if (!params_to_prepare.empty()) {
|
if (!params_to_prepare.empty()) {
|
||||||
if (params_buffer != nullptr) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
LOG_ERROR("%s weight manager is not set for graph params", get_desc().c_str());
|
LOG_ERROR("%s weight manager is not set for graph params", get_desc().c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2194,13 +2189,11 @@ protected:
|
|||||||
plan.valid &&
|
plan.valid &&
|
||||||
max_graph_vram_bytes > 0 &&
|
max_graph_vram_bytes > 0 &&
|
||||||
plan.segments.size() > 1 &&
|
plan.segments.size() > 1 &&
|
||||||
params_backend != runtime_backend &&
|
|
||||||
!sd_backend_is_cpu(runtime_backend);
|
!sd_backend_is_cpu(runtime_backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_attempt_graph_cut_segmented_compute() const {
|
bool can_attempt_graph_cut_segmented_compute() const {
|
||||||
return max_graph_vram_bytes > 0 &&
|
return max_graph_vram_bytes > 0 &&
|
||||||
params_backend != runtime_backend &&
|
|
||||||
!sd_backend_is_cpu(runtime_backend);
|
!sd_backend_is_cpu(runtime_backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2631,16 +2624,15 @@ public:
|
|||||||
public:
|
public:
|
||||||
virtual std::string get_desc() = 0;
|
virtual std::string get_desc() = 0;
|
||||||
|
|
||||||
GGMLRunner(ggml_backend_t backend, ggml_backend_t params_backend)
|
GGMLRunner(ggml_backend_t backend,
|
||||||
: params_backend(params_backend),
|
std::shared_ptr<RunnerWeightManager> manager = nullptr)
|
||||||
runtime_backend(backend) {
|
: runtime_backend(backend),
|
||||||
|
weight_manager(manager) {
|
||||||
GGML_ASSERT(runtime_backend != nullptr);
|
GGML_ASSERT(runtime_backend != nullptr);
|
||||||
GGML_ASSERT(params_backend != nullptr);
|
|
||||||
alloc_params_ctx();
|
alloc_params_ctx();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~GGMLRunner() {
|
virtual ~GGMLRunner() {
|
||||||
free_params_buffer();
|
|
||||||
free_compute_buffer();
|
free_compute_buffer();
|
||||||
free_params_ctx();
|
free_params_ctx();
|
||||||
free_compute_ctx();
|
free_compute_ctx();
|
||||||
@ -2674,73 +2666,6 @@ public:
|
|||||||
alloc_compute_ctx();
|
alloc_compute_ctx();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool alloc_params_buffer() {
|
|
||||||
size_t num_tensors = ggml_tensor_num(params_ctx);
|
|
||||||
if (num_tensors > 0) {
|
|
||||||
// ggml_backend_alloc_ctx_tensors fails when all tensors are already allocated
|
|
||||||
// (typical for memory-mapped weights). See ggml-alloc.c n_buffers==0 branch.
|
|
||||||
bool all_have_data = true;
|
|
||||||
for (ggml_tensor* t = ggml_get_first_tensor(params_ctx); t != nullptr; t = ggml_get_next_tensor(params_ctx, t)) {
|
|
||||||
if (t->data == nullptr) {
|
|
||||||
all_have_data = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (all_have_data) {
|
|
||||||
LOG_DEBUG("%s all params already mmap-allocated (no separate buffer needed)", get_desc().c_str());
|
|
||||||
params_buffer = nullptr;
|
|
||||||
rebuild_params_tensor_set();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG_DEBUG("%s skipping params allocation (no tensors)", get_desc().c_str());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// Pinned host buffer when CPU-offloaded for DMA-direct H2D.
|
|
||||||
ggml_backend_buffer_type_t params_buft = nullptr;
|
|
||||||
if (params_backend != runtime_backend) {
|
|
||||||
ggml_backend_dev_t runtime_dev = ggml_backend_get_device(runtime_backend);
|
|
||||||
if (runtime_dev != nullptr) {
|
|
||||||
params_buft = ggml_backend_dev_host_buffer_type(runtime_dev);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (params_buft == nullptr) {
|
|
||||||
params_buft = ggml_backend_get_default_buffer_type(params_backend);
|
|
||||||
}
|
|
||||||
params_buffer = ggml_backend_alloc_ctx_tensors_from_buft(params_ctx, params_buft);
|
|
||||||
if (params_buffer == nullptr) {
|
|
||||||
LOG_ERROR("%s alloc params backend buffer failed, num_tensors = %i",
|
|
||||||
get_desc().c_str(),
|
|
||||||
num_tensors);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
rebuild_params_tensor_set();
|
|
||||||
ggml_backend_buffer_set_usage(params_buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
|
||||||
size_t params_buffer_size = ggml_backend_buffer_get_size(params_buffer);
|
|
||||||
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
|
||||||
get_desc().c_str(),
|
|
||||||
params_buffer_size / (1024.f * 1024.f),
|
|
||||||
sd_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
|
|
||||||
num_tensors);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void free_params_buffer() {
|
|
||||||
if (params_buffer != nullptr) {
|
|
||||||
ggml_backend_buffer_free(params_buffer);
|
|
||||||
params_buffer = nullptr;
|
|
||||||
}
|
|
||||||
observed_max_effective_budget_ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t get_params_buffer_size() {
|
|
||||||
if (params_buffer != nullptr) {
|
|
||||||
return ggml_backend_buffer_get_size(params_buffer);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void free_cache_ctx_and_buffer() {
|
void free_cache_ctx_and_buffer() {
|
||||||
free_cache_buffer();
|
free_cache_buffer();
|
||||||
@ -2886,15 +2811,6 @@ public:
|
|||||||
weight_adapter = adapter;
|
weight_adapter = adapter;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {
|
|
||||||
weight_manager = manager;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager,
|
|
||||||
const std::string&) {
|
|
||||||
set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_max_graph_vram_bytes(size_t max_vram_bytes) {
|
void set_max_graph_vram_bytes(size_t max_vram_bytes) {
|
||||||
max_graph_vram_bytes = max_vram_bytes;
|
max_graph_vram_bytes = max_vram_bytes;
|
||||||
}
|
}
|
||||||
@ -2902,14 +2818,6 @@ public:
|
|||||||
void set_stream_layers_enabled(bool enabled) {
|
void set_stream_layers_enabled(bool enabled) {
|
||||||
stream_layers_enabled = enabled;
|
stream_layers_enabled = enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_t get_runtime_backend() {
|
|
||||||
return runtime_backend;
|
|
||||||
}
|
|
||||||
|
|
||||||
ggml_backend_t get_params_backend() {
|
|
||||||
return params_backend;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class GGMLBlock {
|
class GGMLBlock {
|
||||||
|
|||||||
@ -19,6 +19,7 @@ struct GenerationExtensionInitContext {
|
|||||||
SDVersion version;
|
SDVersion version;
|
||||||
const String2TensorStorage& tensor_storage_map;
|
const String2TensorStorage& tensor_storage_map;
|
||||||
ModelLoader& model_loader;
|
ModelLoader& model_loader;
|
||||||
|
std::shared_ptr<ModelManager> model_manager;
|
||||||
int n_threads;
|
int n_threads;
|
||||||
std::function<bool(SDBackendModule)> ensure_backend_pair;
|
std::function<bool(SDBackendModule)> ensure_backend_pair;
|
||||||
std::function<ggml_backend_t(SDBackendModule)> backend_for;
|
std::function<ggml_backend_t(SDBackendModule)> backend_for;
|
||||||
@ -46,7 +47,6 @@ struct GenerationExtension {
|
|||||||
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>&) {}
|
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>&) {}
|
||||||
virtual void collect_loras(std::vector<ModelManager::LoraSpec>&) {}
|
virtual void collect_loras(std::vector<ModelManager::LoraSpec>&) {}
|
||||||
virtual void add_ignore_tensors(std::set<std::string>&) const {}
|
virtual void add_ignore_tensors(std::set<std::string>&) const {}
|
||||||
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>&) {}
|
|
||||||
virtual void runner_done() {}
|
virtual void runner_done() {}
|
||||||
virtual void reset_runtime_condition() {}
|
virtual void reset_runtime_condition() {}
|
||||||
virtual bool prepare_condition(GenerationExtensionConditionContext&) {
|
virtual bool prepare_condition(GenerationExtensionConditionContext&) {
|
||||||
|
|||||||
@ -134,11 +134,12 @@ struct PhotoMakerExtension : public GenerationExtension {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pmid_model = std::make_shared<PhotoMakerIDEncoder>(ctx.backend_for(SDBackendModule::PHOTOMAKER),
|
pmid_model = std::make_shared<PhotoMakerIDEncoder>(ctx.backend_for(SDBackendModule::PHOTOMAKER),
|
||||||
ctx.params_backend_for(SDBackendModule::PHOTOMAKER),
|
|
||||||
ctx.tensor_storage_map,
|
ctx.tensor_storage_map,
|
||||||
"pmid",
|
"pmid",
|
||||||
ctx.version,
|
ctx.version,
|
||||||
pm_version);
|
pm_version,
|
||||||
|
20.f,
|
||||||
|
ctx.model_manager);
|
||||||
if (pm_version == PM_VERSION_2) {
|
if (pm_version == PM_VERSION_2) {
|
||||||
LOG_INFO("using PhotoMaker Version 2");
|
LOG_INFO("using PhotoMaker Version 2");
|
||||||
}
|
}
|
||||||
@ -174,12 +175,6 @@ struct PhotoMakerExtension : public GenerationExtension {
|
|||||||
ignore_tensors.insert("pmid.unet.");
|
ignore_tensors.insert("pmid.unet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
if (pmid_model != nullptr) {
|
|
||||||
pmid_model->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
if (pmid_model != nullptr) {
|
if (pmid_model != nullptr) {
|
||||||
pmid_model->runner_done();
|
pmid_model->runner_done();
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include "core/ggml_extend.hpp"
|
#include "core/ggml_extend.hpp"
|
||||||
#include "model_loader.h"
|
#include "model_loader.h"
|
||||||
|
#include "model_manager.h"
|
||||||
|
|
||||||
#define LORA_GRAPH_BASE_SIZE 10240
|
#define LORA_GRAPH_BASE_SIZE 10240
|
||||||
|
|
||||||
@ -14,7 +15,8 @@ struct LoraModel : public GGMLRunner {
|
|||||||
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
||||||
std::set<std::string> applied_lora_tensors;
|
std::set<std::string> applied_lora_tensors;
|
||||||
std::string file_path;
|
std::string file_path;
|
||||||
ModelLoader model_loader;
|
std::shared_ptr<ModelManager> model_manager;
|
||||||
|
ggml_backend_t params_backend = nullptr;
|
||||||
bool load_failed = false;
|
bool load_failed = false;
|
||||||
bool applied = false;
|
bool applied = false;
|
||||||
bool tensor_preprocessed = false;
|
bool tensor_preprocessed = false;
|
||||||
@ -23,13 +25,14 @@ struct LoraModel : public GGMLRunner {
|
|||||||
|
|
||||||
LoraModel(const std::string& lora_id,
|
LoraModel(const std::string& lora_id,
|
||||||
ggml_backend_t backend,
|
ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
ggml_backend_t params_backend_,
|
||||||
const std::string& file_path = "",
|
const std::string& file_path = "",
|
||||||
std::string prefix = "",
|
std::string prefix = "",
|
||||||
SDVersion version = VERSION_COUNT)
|
SDVersion version = VERSION_COUNT,
|
||||||
: lora_id(lora_id), file_path(file_path), GGMLRunner(backend, params_backend) {
|
std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>())
|
||||||
|
: GGMLRunner(backend, manager), lora_id(lora_id), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
|
||||||
prefix = "lora." + prefix;
|
prefix = "lora." + prefix;
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, prefix, version)) {
|
if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix, version)) {
|
||||||
load_failed = true;
|
load_failed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -71,7 +74,10 @@ struct LoraModel : public GGMLRunner {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
model_loader.set_n_threads(n_threads);
|
if (model_manager != nullptr) {
|
||||||
|
model_manager->set_n_threads(n_threads);
|
||||||
|
}
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
model_loader.load_tensors(on_new_tensor_cb);
|
model_loader.load_tensors(on_new_tensor_cb);
|
||||||
|
|
||||||
if (tensors_to_create.empty()) {
|
if (tensors_to_create.empty()) {
|
||||||
@ -88,23 +94,42 @@ struct LoraModel : public GGMLRunner {
|
|||||||
lora_tensors[name] = real;
|
lora_tensors[name] = real;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!alloc_params_buffer()) {
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
LOG_ERROR("lora model buffer allocation failed");
|
for (const auto& pair : lora_tensors) {
|
||||||
|
tensors[pair.first] = pair.second;
|
||||||
|
}
|
||||||
|
if (model_manager == nullptr ||
|
||||||
|
!model_manager->register_param_tensors("LoRA",
|
||||||
|
std::move(tensors),
|
||||||
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
runtime_backend,
|
||||||
|
params_backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("lora model manager registration failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::vector<ggml_tensor*> lora_params;
|
||||||
|
lora_params.reserve(lora_tensors.size());
|
||||||
|
for (const auto& pair : lora_tensors) {
|
||||||
|
lora_params.push_back(pair.second);
|
||||||
|
}
|
||||||
|
if (!model_manager->prepare_params(lora_params)) {
|
||||||
|
LOG_ERROR("lora model manager prepare params failed");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
dry_run = false;
|
|
||||||
model_loader.load_tensors(on_new_tensor_cb);
|
|
||||||
|
|
||||||
LOG_DEBUG("finished loaded lora");
|
LOG_DEBUG("finished loaded lora");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void release_loaded_tensors() {
|
void release_loaded_tensors() {
|
||||||
|
runner_done();
|
||||||
free_compute_buffer();
|
free_compute_buffer();
|
||||||
free_params_buffer();
|
model_manager.reset();
|
||||||
free_params_ctx();
|
free_params_ctx();
|
||||||
alloc_params_ctx();
|
alloc_params_ctx();
|
||||||
|
model_manager = std::make_shared<ModelManager>();
|
||||||
|
weight_manager = model_manager;
|
||||||
lora_tensors.clear();
|
lora_tensors.clear();
|
||||||
original_tensor_to_final_tensor.clear();
|
original_tensor_to_final_tensor.clear();
|
||||||
applied_lora_tensors.clear();
|
applied_lora_tensors.clear();
|
||||||
|
|||||||
@ -413,13 +413,13 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
PhotoMakerIDEncoder(ggml_backend_t backend,
|
PhotoMakerIDEncoder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
SDVersion version = VERSION_SDXL,
|
SDVersion version = VERSION_SDXL,
|
||||||
PMVersion pm_v = PM_VERSION_1,
|
PMVersion pm_v = PM_VERSION_1,
|
||||||
float sty = 20.f)
|
float sty = 20.f,
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
version(version),
|
version(version),
|
||||||
pm_version(pm_v),
|
pm_version(pm_v),
|
||||||
style_strength(sty) {
|
style_strength(sty) {
|
||||||
@ -565,17 +565,18 @@ public:
|
|||||||
struct PhotoMakerIDEmbed : public GGMLRunner {
|
struct PhotoMakerIDEmbed : public GGMLRunner {
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
std::string file_path;
|
std::string file_path;
|
||||||
ModelLoader* model_loader;
|
std::shared_ptr<ModelManager> model_manager;
|
||||||
|
ggml_backend_t params_backend = nullptr;
|
||||||
bool load_failed = false;
|
bool load_failed = false;
|
||||||
bool applied = false;
|
bool applied = false;
|
||||||
|
|
||||||
PhotoMakerIDEmbed(ggml_backend_t backend,
|
PhotoMakerIDEmbed(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
ggml_backend_t params_backend_,
|
||||||
ModelLoader* ml,
|
std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>(),
|
||||||
const std::string& file_path = "",
|
const std::string& file_path = "",
|
||||||
const std::string& prefix = "")
|
const std::string& prefix = "")
|
||||||
: file_path(file_path), GGMLRunner(backend, params_backend), model_loader(ml) {
|
: GGMLRunner(backend, manager), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
|
||||||
if (!model_loader->init_from_file_and_convert_name(file_path, prefix)) {
|
if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix)) {
|
||||||
load_failed = true;
|
load_failed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -616,15 +617,27 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
model_loader->set_n_threads(n_threads);
|
model_manager->set_n_threads(n_threads);
|
||||||
model_loader->load_tensors(on_new_tensor_cb);
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!alloc_params_buffer()) {
|
model_loader.load_tensors(on_new_tensor_cb);
|
||||||
LOG_ERROR("PhotoMaker ID embeds buffer allocation failed");
|
if (!model_manager->register_param_tensors("PhotoMaker ID embeds",
|
||||||
|
tensors,
|
||||||
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
runtime_backend,
|
||||||
|
params_backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("PhotoMaker ID embeds model manager registration failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::vector<ggml_tensor*> id_embed_params;
|
||||||
|
id_embed_params.reserve(tensors.size());
|
||||||
|
for (const auto& pair : tensors) {
|
||||||
|
id_embed_params.push_back(pair.second);
|
||||||
|
}
|
||||||
|
if (!model_manager->prepare_params(id_embed_params)) {
|
||||||
|
LOG_ERROR("PhotoMaker ID embeds model manager prepare params failed");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
dry_run = false;
|
|
||||||
model_loader->load_tensors(on_new_tensor_cb);
|
|
||||||
|
|
||||||
LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
|
LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@ -560,11 +560,11 @@ protected:
|
|||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float get_alpha() {
|
ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
|
||||||
// image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,]
|
// image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,]
|
||||||
// so learned_with_images is same as learned
|
// so learned_with_images is same as learned
|
||||||
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]);
|
auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
|
||||||
return sigmoid(alpha);
|
return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -578,11 +578,12 @@ public:
|
|||||||
ggml_tensor* x_spatial,
|
ggml_tensor* x_spatial,
|
||||||
ggml_tensor* x_temporal) {
|
ggml_tensor* x_temporal) {
|
||||||
// image_only_indicator is always tensor([0.])
|
// image_only_indicator is always tensor([0.])
|
||||||
float alpha = get_alpha();
|
auto alpha = get_alpha(ctx);
|
||||||
auto x = ggml_add(ctx->ggml_ctx,
|
return ggml_add(ctx->ggml_ctx,
|
||||||
ggml_ext_scale(ctx->ggml_ctx, x_spatial, alpha),
|
x_temporal,
|
||||||
ggml_ext_scale(ctx->ggml_ctx, x_temporal, 1.0f - alpha));
|
ggml_mul(ctx->ggml_ctx,
|
||||||
return x;
|
ggml_sub(ctx->ggml_ctx, x_spatial, x_temporal),
|
||||||
|
alpha));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -561,10 +561,10 @@ namespace Anima {
|
|||||||
AnimaNet net;
|
AnimaNet net;
|
||||||
|
|
||||||
AnimaRunner(ggml_backend_t backend,
|
AnimaRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "model.diffusion_model")
|
const std::string prefix = "model.diffusion_model",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(AnimaConfig::detect_from_weights(tensor_storage_map, prefix + ".net")) {
|
config(AnimaConfig::detect_from_weights(tensor_storage_map, prefix + ".net")) {
|
||||||
net = AnimaNet(config);
|
net = AnimaNet(config);
|
||||||
net.init(params_ctx, tensor_storage_map, prefix + ".net");
|
net.init(params_ctx, tensor_storage_map, prefix + ".net");
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
#ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
#ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
||||||
#define __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
#define __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
||||||
|
|
||||||
#include "model/common/block.hpp"
|
#include "model/common/block.hpp"
|
||||||
#include "model_loader.h"
|
#include "model_loader.h"
|
||||||
|
#include "model_manager.h"
|
||||||
|
|
||||||
#define CONTROL_NET_GRAPH_SIZE 1536
|
#define CONTROL_NET_GRAPH_SIZE 1536
|
||||||
|
|
||||||
@ -318,13 +319,16 @@ struct ControlNet : public GGMLRunner {
|
|||||||
std::vector<sd::Tensor<float>> controls;
|
std::vector<sd::Tensor<float>> controls;
|
||||||
sd::Tensor<float> guided_hint;
|
sd::Tensor<float> guided_hint;
|
||||||
bool guided_hint_cached = false;
|
bool guided_hint_cached = false;
|
||||||
|
std::shared_ptr<ModelManager> owned_model_manager;
|
||||||
|
ggml_backend_t params_backend = nullptr;
|
||||||
|
|
||||||
ControlNet(ggml_backend_t backend,
|
ControlNet(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
ggml_backend_t params_backend_,
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
SDVersion version = VERSION_SD1,
|
SDVersion version = VERSION_SD1,
|
||||||
const std::string& prefix = "")
|
const std::string& prefix = "",
|
||||||
: GGMLRunner(backend, params_backend), version(version), control_net(version), weight_prefix(prefix) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager), version(version), control_net(version), weight_prefix(prefix), params_backend(params_backend_) {
|
||||||
control_net.init(params_ctx, tensor_storage_map, prefix);
|
control_net.init(params_ctx, tensor_storage_map, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -459,31 +463,35 @@ struct ControlNet : public GGMLRunner {
|
|||||||
|
|
||||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||||
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
||||||
if (!alloc_params_buffer()) {
|
|
||||||
LOG_ERROR("control net model buffer allocation failed");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
control_net.get_param_tensors(tensors);
|
control_net.get_param_tensors(tensors);
|
||||||
std::set<std::string> ignore_tensors;
|
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto manager = std::dynamic_pointer_cast<ModelManager>(weight_manager.lock());
|
||||||
|
if (manager == nullptr) {
|
||||||
|
owned_model_manager = std::make_shared<ModelManager>();
|
||||||
|
weight_manager = owned_model_manager;
|
||||||
|
manager = owned_model_manager;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModelLoader& model_loader = manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||||
LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
model_loader.set_n_threads(n_threads);
|
manager->set_n_threads(n_threads);
|
||||||
bool success = model_loader.load_tensors(tensors, ignore_tensors);
|
if (!manager->register_param_tensors("ControlNet",
|
||||||
|
std::move(tensors),
|
||||||
if (!success) {
|
ModelManager::ResidencyMode::Resident,
|
||||||
LOG_ERROR("load control net tensors from model loader failed");
|
runtime_backend,
|
||||||
|
params_backend) ||
|
||||||
|
!manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register control net tensors with model manager failed");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("control net model loaded");
|
LOG_INFO("control net model loaded");
|
||||||
return success;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -387,10 +387,10 @@ namespace ErnieImage {
|
|||||||
std::vector<float> pe_vec;
|
std::vector<float> pe_vec;
|
||||||
|
|
||||||
ErnieImageRunner(ggml_backend_t backend,
|
ErnieImageRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "")
|
const std::string prefix = "",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(ErnieImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(ErnieImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
ernie_image = ErnieImageModel(config);
|
ernie_image = ErnieImageModel(config);
|
||||||
ernie_image.init(params_ctx, tensor_storage_map, prefix);
|
ernie_image.init(params_ctx, tensor_storage_map, prefix);
|
||||||
|
|||||||
@ -1301,12 +1301,12 @@ namespace Flux {
|
|||||||
bool use_mask = false;
|
bool use_mask = false;
|
||||||
|
|
||||||
FluxRunner(ggml_backend_t backend,
|
FluxRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_FLUX,
|
SDVersion version = VERSION_FLUX,
|
||||||
bool use_mask = false)
|
bool use_mask = false,
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(FluxConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
config(FluxConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
||||||
version(version),
|
version(version),
|
||||||
use_mask(use_mask) {
|
use_mask(use_mask) {
|
||||||
@ -1583,7 +1583,8 @@ namespace Flux {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -1599,24 +1600,20 @@ namespace Flux {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend,
|
std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
VERSION_FLUX2,
|
VERSION_FLUX2,
|
||||||
false);
|
false,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!flux->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("Flux test",
|
||||||
LOG_ERROR("flux model allocation failed");
|
*flux,
|
||||||
return;
|
"model.diffusion_model",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
backend,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend) ||
|
||||||
flux->get_param_tensors(tensors, "model.diffusion_model");
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register flux tensors with model manager failed");
|
||||||
bool success = model_loader.load_tensors(tensors);
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
#ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
||||||
#define __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
#define __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -282,10 +282,10 @@ namespace HiDreamO1 {
|
|||||||
std::array<std::vector<float>, 4> pos_embed_weight_data_;
|
std::array<std::vector<float>, 4> pos_embed_weight_data_;
|
||||||
|
|
||||||
HiDreamO1VisionRunner(ggml_backend_t backend,
|
HiDreamO1VisionRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string& prefix = "model.visual")
|
const std::string& prefix = "model.visual",
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)),
|
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)),
|
||||||
model(std::make_shared<LLM::VisionModel>(false, config.llm.vision)) {
|
model(std::make_shared<LLM::VisionModel>(false, config.llm.vision)) {
|
||||||
model->init(params_ctx, tensor_storage_map, prefix);
|
model->init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -343,10 +343,10 @@ namespace HiDreamO1 {
|
|||||||
std::vector<float> attention_mask_vec;
|
std::vector<float> attention_mask_vec;
|
||||||
|
|
||||||
HiDreamO1Runner(ggml_backend_t backend,
|
HiDreamO1Runner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string& prefix = "model")
|
const std::string& prefix = "model",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)) {
|
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
model = HiDreamO1Model(config);
|
model = HiDreamO1Model(config);
|
||||||
model.init(params_ctx, tensor_storage_map, prefix);
|
model.init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -490,9 +490,9 @@ namespace HiDreamO1 {
|
|||||||
std::shared_ptr<HiDreamO1VisionRunner> vision_runner;
|
std::shared_ptr<HiDreamO1VisionRunner> vision_runner;
|
||||||
|
|
||||||
HiDreamO1Conditioner(ggml_backend_t backend,
|
HiDreamO1Conditioner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {})
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, params_backend, tensor_storage_map)) {}
|
: vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, tensor_storage_map, "model.visual", weight_manager)) {}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
vision_runner->get_param_tensors(tensors);
|
vision_runner->get_param_tensors(tensors);
|
||||||
@ -510,10 +510,6 @@ namespace HiDreamO1 {
|
|||||||
vision_runner->set_weight_adapter(adapter);
|
vision_runner->set_weight_adapter(adapter);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
|
||||||
vision_runner->set_weight_manager(manager);
|
|
||||||
}
|
|
||||||
|
|
||||||
void runner_done() override {
|
void runner_done() override {
|
||||||
vision_runner->runner_done();
|
vision_runner->runner_done();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -449,10 +449,10 @@ namespace Ideogram4 {
|
|||||||
std::vector<int32_t> image_indicator_vec;
|
std::vector<int32_t> image_indicator_vec;
|
||||||
|
|
||||||
Ideogram4Runner(ggml_backend_t backend,
|
Ideogram4Runner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "")
|
const std::string prefix = "",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(Ideogram4Config::detect_from_weights(tensor_storage_map, prefix)),
|
config(Ideogram4Config::detect_from_weights(tensor_storage_map, prefix)),
|
||||||
uncond_prefix(prefix + ".uncond") {
|
uncond_prefix(prefix + ".uncond") {
|
||||||
model = Ideogram4Transformer(config);
|
model = Ideogram4Transformer(config);
|
||||||
|
|||||||
@ -356,10 +356,10 @@ namespace Lens {
|
|||||||
std::vector<float> pe_vec;
|
std::vector<float> pe_vec;
|
||||||
|
|
||||||
LensRunner(ggml_backend_t backend,
|
LensRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "")
|
const std::string prefix = "",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(LensConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(LensConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
lens = LensModel(config);
|
lens = LensModel(config);
|
||||||
lens.init(params_ctx, tensor_storage_map, prefix);
|
lens.init(params_ctx, tensor_storage_map, prefix);
|
||||||
|
|||||||
@ -1686,10 +1686,10 @@ namespace LTXV {
|
|||||||
sd::Tensor<float> ax_input_cache;
|
sd::Tensor<float> ax_input_cache;
|
||||||
|
|
||||||
LTXAVRunner(ggml_backend_t backend,
|
LTXAVRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string& prefix = "model.diffusion_model")
|
const std::string& prefix = "model.diffusion_model",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(LTXAVConfig::detect_from_weights(tensor_storage_map, prefix)),
|
config(LTXAVConfig::detect_from_weights(tensor_storage_map, prefix)),
|
||||||
model(config) {
|
model(config) {
|
||||||
model.init(params_ctx, tensor_storage_map, prefix);
|
model.init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -2025,7 +2025,8 @@ namespace LTXV {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
|
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(model_path, "model.diffusion_model.")) {
|
if (!model_loader.init_from_file_and_convert_name(model_path, "model.diffusion_model.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -2040,19 +2041,18 @@ namespace LTXV {
|
|||||||
|
|
||||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||||
std::shared_ptr<LTXAVRunner> ltxav = std::make_shared<LTXAVRunner>(backend,
|
std::shared_ptr<LTXAVRunner> ltxav = std::make_shared<LTXAVRunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!ltxav->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("LTXAV test",
|
||||||
LOG_ERROR("ltxav buffer allocation failed");
|
*ltxav,
|
||||||
return;
|
"model.diffusion_model",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend,
|
||||||
ltxav->get_param_tensors(tensors, "model.diffusion_model");
|
backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
if (!model_loader.load_tensors(tensors)) {
|
LOG_ERROR("register ltxav tensors with model manager failed");
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -879,10 +879,10 @@ struct MMDiTRunner : public DiffusionModelRunner {
|
|||||||
MMDiT mmdit;
|
MMDiT mmdit;
|
||||||
|
|
||||||
MMDiTRunner(ggml_backend_t backend,
|
MMDiTRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "")
|
const std::string prefix = "",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(MMDiTConfig::detect_from_weights(tensor_storage_map, prefix)),
|
config(MMDiTConfig::detect_from_weights(tensor_storage_map, prefix)),
|
||||||
mmdit(config) {
|
mmdit(config) {
|
||||||
mmdit.init(params_ctx, tensor_storage_map, prefix);
|
mmdit.init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -1001,28 +1001,25 @@ struct MMDiTRunner : public DiffusionModelRunner {
|
|||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, backend);
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, String2TensorStorage{}, "", model_manager);
|
||||||
{
|
{
|
||||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||||
|
|
||||||
if (!mmdit->alloc_params_buffer()) {
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
LOG_ERROR("mmdit embeds buffer allocation failed");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
|
||||||
mmdit->get_param_tensors(tensors, "model.diffusion_model");
|
|
||||||
|
|
||||||
ModelLoader model_loader;
|
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool success = model_loader.load_tensors(tensors);
|
if (!model_manager->register_runner_params("MMDiT test",
|
||||||
|
*mmdit,
|
||||||
if (!success) {
|
"model.diffusion_model",
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
backend,
|
||||||
|
backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register mmdit tensors with model manager failed");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__
|
#ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__
|
||||||
#define __SD_MODEL_DIFFUSION_MODEL_HPP__
|
#define __SD_MODEL_DIFFUSION_MODEL_HPP__
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "core/ggml_extend.hpp"
|
#include "core/ggml_extend.hpp"
|
||||||
#include "core/tensor_ggml.hpp"
|
#include "core/tensor_ggml.hpp"
|
||||||
|
#include "model_manager.h"
|
||||||
|
|
||||||
struct UNetDiffusionExtra {
|
struct UNetDiffusionExtra {
|
||||||
int num_video_frames = -1;
|
int num_video_frames = -1;
|
||||||
@ -88,9 +89,9 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
DiffusionModelRunner(ggml_backend_t backend,
|
DiffusionModelRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const std::string& prefix,
|
||||||
const std::string& prefix)
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: GGMLRunner(backend, params_backend),
|
: GGMLRunner(backend, weight_manager),
|
||||||
prefix(prefix) {}
|
prefix(prefix) {}
|
||||||
|
|
||||||
virtual sd::Tensor<float> compute(int n_threads,
|
virtual sd::Tensor<float> compute(int n_threads,
|
||||||
|
|||||||
@ -710,10 +710,10 @@ namespace Pid {
|
|||||||
std::vector<float> pixel_pos_comp_vec;
|
std::vector<float> pixel_pos_comp_vec;
|
||||||
|
|
||||||
PiDRunner(ggml_backend_t backend,
|
PiDRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix = "model.diffusion_model")
|
const std::string prefix = "model.diffusion_model",
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(PixelDiTConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(PixelDiTConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
model = PixelDiT(config);
|
model = PixelDiT(config);
|
||||||
model.init(params_ctx, tensor_storage_map, prefix);
|
model.init(params_ctx, tensor_storage_map, prefix);
|
||||||
|
|||||||
@ -518,12 +518,12 @@ namespace Qwen {
|
|||||||
SDVersion version;
|
SDVersion version;
|
||||||
|
|
||||||
QwenImageRunner(ggml_backend_t backend,
|
QwenImageRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_QWEN_IMAGE,
|
SDVersion version = VERSION_QWEN_IMAGE,
|
||||||
bool zero_cond_t = false)
|
bool zero_cond_t = false,
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(QwenImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(QwenImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
config.zero_cond_t = config.zero_cond_t || zero_cond_t;
|
config.zero_cond_t = config.zero_cond_t || zero_cond_t;
|
||||||
qwen_image = QwenImageModel(config);
|
qwen_image = QwenImageModel(config);
|
||||||
@ -691,7 +691,8 @@ namespace Qwen {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -705,23 +706,20 @@ namespace Qwen {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend,
|
std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
VERSION_QWEN_IMAGE);
|
VERSION_QWEN_IMAGE,
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!qwen_image->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("Qwen image test",
|
||||||
LOG_ERROR("qwen_image buffer allocation failed");
|
*qwen_image,
|
||||||
return;
|
"model.diffusion_model",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
backend,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend) ||
|
||||||
qwen_image->get_param_tensors(tensors, "model.diffusion_model");
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register qwen_image tensors with model manager failed");
|
||||||
bool success = model_loader.load_tensors(tensors);
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -694,11 +694,11 @@ struct UNetModelRunner : public DiffusionModelRunner {
|
|||||||
UnetModelBlock unet;
|
UnetModelBlock unet;
|
||||||
|
|
||||||
UNetModelRunner(ggml_backend_t backend,
|
UNetModelRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1,
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(UNetConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
config(UNetConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
||||||
unet(config) {
|
unet(config) {
|
||||||
unet.init(params_ctx, tensor_storage_map, prefix);
|
unet.init(params_ctx, tensor_storage_map, prefix);
|
||||||
|
|||||||
@ -799,11 +799,11 @@ namespace WAN {
|
|||||||
SDVersion version;
|
SDVersion version;
|
||||||
|
|
||||||
WanRunner(ggml_backend_t backend,
|
WanRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_WAN2)
|
SDVersion version = VERSION_WAN2,
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(WanConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(WanConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
if (config.num_layers == 30) {
|
if (config.num_layers == 30) {
|
||||||
if (version == VERSION_WAN2_2_TI2V) {
|
if (version == VERSION_WAN2_2_TI2V) {
|
||||||
@ -1017,7 +1017,8 @@ namespace WAN {
|
|||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -1031,23 +1032,19 @@ namespace WAN {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend,
|
std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
VERSION_WAN2_2_TI2V);
|
VERSION_WAN2_2_TI2V,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!wan->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("Wan test",
|
||||||
LOG_ERROR("wan buffer allocation failed");
|
*wan,
|
||||||
return;
|
"model.diffusion_model",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
backend,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend) ||
|
||||||
wan->get_param_tensors(tensors, "model.diffusion_model");
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register wan tensors with model manager failed");
|
||||||
bool success = model_loader.load_tensors(tensors);
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -553,11 +553,11 @@ namespace ZImage {
|
|||||||
SDVersion version;
|
SDVersion version;
|
||||||
|
|
||||||
ZImageRunner(ggml_backend_t backend,
|
ZImageRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_Z_IMAGE)
|
SDVersion version = VERSION_Z_IMAGE,
|
||||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||||
config(ZImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
config(ZImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||||
z_image = ZImageModel(config);
|
z_image = ZImageModel(config);
|
||||||
z_image.init(params_ctx, tensor_storage_map, prefix);
|
z_image.init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -698,7 +698,8 @@ namespace ZImage {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -714,22 +715,19 @@ namespace ZImage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<ZImageRunner> z_image = std::make_shared<ZImageRunner>(backend,
|
std::shared_ptr<ZImageRunner> z_image = std::make_shared<ZImageRunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
VERSION_QWEN_IMAGE);
|
VERSION_QWEN_IMAGE,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!z_image->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("ZImage test",
|
||||||
LOG_ERROR("z_image buffer allocation failed");
|
*z_image,
|
||||||
return;
|
"model.diffusion_model",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend,
|
||||||
z_image->get_param_tensors(tensors, "model.diffusion_model");
|
backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
bool success = model_loader.load_tensors(tensors);
|
LOG_ERROR("register z_image tensors with model manager failed");
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_TE_CLIP_HPP__
|
#ifndef __SD_MODEL_TE_CLIP_HPP__
|
||||||
#define __SD_MODEL_TE_CLIP_HPP__
|
#define __SD_MODEL_TE_CLIP_HPP__
|
||||||
|
|
||||||
#include "core/ggml_extend.hpp"
|
#include "core/ggml_extend.hpp"
|
||||||
@ -469,13 +469,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
std::vector<float> attention_mask_vec;
|
std::vector<float> attention_mask_vec;
|
||||||
|
|
||||||
CLIPTextModelRunner(ggml_backend_t backend,
|
CLIPTextModelRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
bool with_final_ln = true,
|
bool with_final_ln = true,
|
||||||
bool force_clip_f32 = false)
|
bool force_clip_f32 = false,
|
||||||
: GGMLRunner(backend, params_backend) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager) {
|
||||||
bool proj_in = false;
|
bool proj_in = false;
|
||||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
if (!starts_with(name, prefix)) {
|
if (!starts_with(name, prefix)) {
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_TE_LLM_HPP__
|
#ifndef __SD_MODEL_TE_LLM_HPP__
|
||||||
#define __SD_MODEL_TE_LLM_HPP__
|
#define __SD_MODEL_TE_LLM_HPP__
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -22,6 +22,7 @@
|
|||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "model/common/rope.hpp"
|
#include "model/common/rope.hpp"
|
||||||
#include "model_loader.h"
|
#include "model_loader.h"
|
||||||
|
#include "model_manager.h"
|
||||||
#include "tokenizers/bpe_tokenizer.h"
|
#include "tokenizers/bpe_tokenizer.h"
|
||||||
#include "tokenizers/gemma_tokenizer.h"
|
#include "tokenizers/gemma_tokenizer.h"
|
||||||
#include "tokenizers/gpt_oss_tokenizer.h"
|
#include "tokenizers/gpt_oss_tokenizer.h"
|
||||||
@ -1571,11 +1572,11 @@ namespace LLM {
|
|||||||
public:
|
public:
|
||||||
LLMRunner(LLMArch arch,
|
LLMRunner(LLMArch arch,
|
||||||
ggml_backend_t backend,
|
ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool enable_vision_ = false)
|
bool enable_vision_ = false,
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
config(LLMConfig::detect_from_weights(tensor_storage_map, prefix, arch)),
|
config(LLMConfig::detect_from_weights(tensor_storage_map, prefix, arch)),
|
||||||
enable_vision(enable_vision_) {
|
enable_vision(enable_vision_) {
|
||||||
if (enable_vision && !config.have_vision_weight) {
|
if (enable_vision && !config.have_vision_weight) {
|
||||||
@ -1822,11 +1823,11 @@ namespace LLM {
|
|||||||
|
|
||||||
LLMEmbedder(LLMArch arch,
|
LLMEmbedder(LLMArch arch,
|
||||||
ggml_backend_t backend,
|
ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
bool enable_vision = false)
|
bool enable_vision = false,
|
||||||
: model(arch, backend, params_backend, tensor_storage_map, prefix, enable_vision) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: model(arch, backend, tensor_storage_map, prefix, enable_vision, weight_manager) {
|
||||||
if (arch == LLMArch::MISTRAL_SMALL_3_2 || arch == LLMArch::MINISTRAL_3_3B) {
|
if (arch == LLMArch::MISTRAL_SMALL_3_2 || arch == LLMArch::MINISTRAL_3_3B) {
|
||||||
tokenizer = std::make_shared<MistralTokenizer>();
|
tokenizer = std::make_shared<MistralTokenizer>();
|
||||||
} else if (arch == LLMArch::GPT_OSS_20B) {
|
} else if (arch == LLMArch::GPT_OSS_20B) {
|
||||||
@ -1840,13 +1841,6 @@ namespace LLM {
|
|||||||
model.get_param_tensors(tensors, prefix);
|
model.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool alloc_params_buffer() {
|
|
||||||
if (!model.alloc_params_buffer()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
|
std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
|
||||||
std::pair<int, int> attn_range,
|
std::pair<int, int> attn_range,
|
||||||
size_t max_length = 0,
|
size_t max_length = 0,
|
||||||
@ -2062,7 +2056,8 @@ namespace LLM {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "text_encoders.llm.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "text_encoders.llm.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -2080,24 +2075,20 @@ namespace LLM {
|
|||||||
LLMArch arch = LLMArch::QWEN3;
|
LLMArch arch = LLMArch::QWEN3;
|
||||||
|
|
||||||
std::shared_ptr<LLMEmbedder> llm = std::make_shared<LLMEmbedder>(arch,
|
std::shared_ptr<LLMEmbedder> llm = std::make_shared<LLMEmbedder>(arch,
|
||||||
backend,
|
|
||||||
backend,
|
backend,
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"text_encoders.llm",
|
"text_encoders.llm",
|
||||||
true);
|
true,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!llm->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("LLM test",
|
||||||
LOG_ERROR("llm model allocation failed");
|
*llm,
|
||||||
return;
|
"text_encoders.llm",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
backend,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend) ||
|
||||||
llm->get_param_tensors(tensors, "text_encoders.llm");
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register llm tensors with model manager failed");
|
||||||
bool success = model_loader.load_tensors(tensors);
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_TE_T5_HPP__
|
#ifndef __SD_MODEL_TE_T5_HPP__
|
||||||
#define __SD_MODEL_TE_T5_HPP__
|
#define __SD_MODEL_TE_T5_HPP__
|
||||||
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
#include "core/ggml_extend.hpp"
|
#include "core/ggml_extend.hpp"
|
||||||
#include "model_loader.h"
|
#include "model_loader.h"
|
||||||
|
#include "model_manager.h"
|
||||||
#include "tokenizers/t5_unigram_tokenizer.h"
|
#include "tokenizers/t5_unigram_tokenizer.h"
|
||||||
|
|
||||||
struct T5Config {
|
struct T5Config {
|
||||||
@ -334,11 +335,11 @@ struct T5Runner : public GGMLRunner {
|
|||||||
std::vector<int> relative_position_bucket_vec;
|
std::vector<int> relative_position_bucket_vec;
|
||||||
|
|
||||||
T5Runner(ggml_backend_t backend,
|
T5Runner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool is_umt5 = false)
|
bool is_umt5 = false,
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
config(T5Config::detect_from_weights(tensor_storage_map, prefix, is_umt5)) {
|
config(T5Config::detect_from_weights(tensor_storage_map, prefix, is_umt5)) {
|
||||||
model = T5(config);
|
model = T5(config);
|
||||||
model.init(params_ctx, tensor_storage_map, prefix);
|
model.init(params_ctx, tensor_storage_map, prefix);
|
||||||
@ -477,24 +478,17 @@ struct T5Embedder {
|
|||||||
T5Runner model;
|
T5Runner model;
|
||||||
|
|
||||||
T5Embedder(ggml_backend_t backend,
|
T5Embedder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
bool is_umt5 = false)
|
bool is_umt5 = false,
|
||||||
: model(backend, params_backend, tensor_storage_map, prefix, is_umt5), tokenizer(is_umt5) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: model(backend, tensor_storage_map, prefix, is_umt5, weight_manager), tokenizer(is_umt5) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
model.get_param_tensors(tensors, prefix);
|
model.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool alloc_params_buffer() {
|
|
||||||
if (!model.alloc_params_buffer()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
|
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
|
||||||
size_t max_length = 0,
|
size_t max_length = 0,
|
||||||
bool padding = false) {
|
bool padding = false) {
|
||||||
@ -579,7 +573,8 @@ struct T5Embedder {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -592,19 +587,16 @@ struct T5Embedder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, backend, tensor_storage_map, "", true);
|
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, tensor_storage_map, "", true, model_manager);
|
||||||
|
|
||||||
if (!t5->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("T5 test",
|
||||||
LOG_ERROR("t5 params buffer allocation failed");
|
*t5,
|
||||||
return;
|
"",
|
||||||
}
|
ModelManager::ResidencyMode::Resident,
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
backend,
|
||||||
t5->get_param_tensors(tensors, "");
|
backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
bool success = model_loader.load_tensors(tensors);
|
LOG_ERROR("register t5 tensors with model manager failed");
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
#ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
||||||
#define __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
#define __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -229,9 +229,9 @@ struct ESRGAN : public GGMLRunner {
|
|||||||
std::unique_ptr<RRDBNet> rrdb_net;
|
std::unique_ptr<RRDBNet> rrdb_net;
|
||||||
|
|
||||||
ESRGAN(ggml_backend_t backend,
|
ESRGAN(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const String2TensorStorage& tensor_storage_map = {})
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: GGMLRunner(backend, params_backend),
|
: GGMLRunner(backend, weight_manager),
|
||||||
config(ESRGANConfig::detect_from_weights(tensor_storage_map)),
|
config(ESRGANConfig::detect_from_weights(tensor_storage_map)),
|
||||||
rrdb_net(std::make_unique<RRDBNet>(config)) {
|
rrdb_net(std::make_unique<RRDBNet>(config)) {
|
||||||
rrdb_net->init(params_ctx, tensor_storage_map, "");
|
rrdb_net->init(params_ctx, tensor_storage_map, "");
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
#ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
||||||
#define __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
#define __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -433,9 +433,9 @@ namespace LTXVUpsampler {
|
|||||||
std::unique_ptr<LatentUpsampler> model;
|
std::unique_ptr<LatentUpsampler> model;
|
||||||
|
|
||||||
LatentUpsamplerRunner(ggml_backend_t backend,
|
LatentUpsamplerRunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const String2TensorStorage& tensor_storage_map)
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: GGMLRunner(backend, params_backend),
|
: GGMLRunner(backend, weight_manager),
|
||||||
config(LatentUpsamplerConfig::detect_from_weights(tensor_storage_map)) {
|
config(LatentUpsamplerConfig::detect_from_weights(tensor_storage_map)) {
|
||||||
if (config.dims != 3 || (!config.spatial_upsample && !config.temporal_upsample) ||
|
if (config.dims != 3 || (!config.spatial_upsample && !config.temporal_upsample) ||
|
||||||
config.spatial_up_num < 1 || config.spatial_down_den < 1 || config.temporal_up_factor < 1) {
|
config.spatial_up_num < 1 || config.spatial_down_den < 1 || config.temporal_up_factor < 1) {
|
||||||
|
|||||||
@ -213,9 +213,9 @@ protected:
|
|||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float get_alpha() {
|
ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
|
||||||
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]);
|
auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
|
||||||
return sigmoid(alpha);
|
return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -250,10 +250,12 @@ public:
|
|||||||
|
|
||||||
x = time_stack->forward(ctx, x); // b t c (h w)
|
x = time_stack->forward(ctx, x); // b t c (h w)
|
||||||
|
|
||||||
float alpha = get_alpha();
|
auto alpha = get_alpha(ctx);
|
||||||
x = ggml_add(ctx->ggml_ctx,
|
x = ggml_add(ctx->ggml_ctx,
|
||||||
ggml_ext_scale(ctx->ggml_ctx, x, alpha),
|
x_mix,
|
||||||
ggml_ext_scale(ctx->ggml_ctx, x_mix, 1.0f - alpha));
|
ggml_mul(ctx->ggml_ctx,
|
||||||
|
ggml_sub(ctx->ggml_ctx, x, x_mix),
|
||||||
|
alpha));
|
||||||
|
|
||||||
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
|
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
|
||||||
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
|
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
|
||||||
@ -664,13 +666,13 @@ struct AutoEncoderKL : public VAE {
|
|||||||
AutoEncoderKLModel ae;
|
AutoEncoderKLModel ae;
|
||||||
|
|
||||||
AutoEncoderKL(ggml_backend_t backend,
|
AutoEncoderKL(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool decode_only = false,
|
bool decode_only = false,
|
||||||
bool use_video_decoder = false,
|
bool use_video_decoder = false,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1,
|
||||||
: VAE(version, backend, params_backend, prefix), decode_only(decode_only) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only) {
|
||||||
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
||||||
scale_factor = 0.18215f;
|
scale_factor = 0.18215f;
|
||||||
shift_factor = 0.f;
|
shift_factor = 0.f;
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
#ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
||||||
#define __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
#define __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "core/ggml_extend.hpp"
|
#include "core/ggml_extend.hpp"
|
||||||
#include "model_loader.h"
|
#include "model_loader.h"
|
||||||
|
#include "model_manager.h"
|
||||||
|
|
||||||
namespace LTXV {
|
namespace LTXV {
|
||||||
|
|
||||||
@ -1001,10 +1002,10 @@ namespace LTXV {
|
|||||||
sd::Tensor<float> bwe_skip_filter_tensor;
|
sd::Tensor<float> bwe_skip_filter_tensor;
|
||||||
|
|
||||||
LTXAudioVAERunner(ggml_backend_t backend,
|
LTXAudioVAERunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string& prefix = "")
|
const std::string& prefix = "",
|
||||||
: GGMLRunner(backend, params_backend),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: GGMLRunner(backend, weight_manager),
|
||||||
weight_prefix(prefix),
|
weight_prefix(prefix),
|
||||||
config(LTXAudioVAEConfig::detect_from_weights(tensor_storage_map)),
|
config(LTXAudioVAEConfig::detect_from_weights(tensor_storage_map)),
|
||||||
model(config) {
|
model(config) {
|
||||||
@ -1019,7 +1020,7 @@ namespace LTXV {
|
|||||||
model.get_param_tensors(tensors, weight_prefix);
|
model.get_param_tensors(tensors, weight_prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_params_buffer_size() {
|
size_t get_params_mem_size() {
|
||||||
return model.get_params_mem_size();
|
return model.get_params_mem_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1066,7 +1067,8 @@ namespace LTXV {
|
|||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
|
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file(model_path)) {
|
if (!model_loader.init_from_file(model_path)) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -1074,20 +1076,17 @@ namespace LTXV {
|
|||||||
|
|
||||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||||
auto ltx_audio_vae = std::make_shared<LTXAudioVAERunner>(backend,
|
auto ltx_audio_vae = std::make_shared<LTXAudioVAERunner>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
prefix);
|
prefix,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!ltx_audio_vae->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("LTX audio VAE test",
|
||||||
LOG_ERROR("ltx audio vae buffer allocation failed");
|
*ltx_audio_vae,
|
||||||
return;
|
ModelManager::ResidencyMode::Resident,
|
||||||
}
|
backend,
|
||||||
|
backend) ||
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
!model_manager->validate_registered_tensors()) {
|
||||||
ltx_audio_vae->get_param_tensors(tensors);
|
LOG_ERROR("register ltx audio vae tensors with model manager failed");
|
||||||
|
|
||||||
if (!model_loader.load_tensors(tensors)) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -957,8 +957,8 @@ namespace LTXVAE {
|
|||||||
|
|
||||||
ggml_tensor* scaled_timestep = timestep;
|
ggml_tensor* scaled_timestep = timestep;
|
||||||
if (timestep_conditioning) {
|
if (timestep_conditioning) {
|
||||||
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]);
|
auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
|
||||||
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier);
|
scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
|
||||||
}
|
}
|
||||||
|
|
||||||
x = conv_in->forward(ctx, x, causal_decoder);
|
x = conv_in->forward(ctx, x, causal_decoder);
|
||||||
@ -1008,8 +1008,8 @@ namespace LTXVAE {
|
|||||||
|
|
||||||
ggml_tensor* scaled_timestep = timestep;
|
ggml_tensor* scaled_timestep = timestep;
|
||||||
if (timestep_conditioning && timestep != nullptr) {
|
if (timestep_conditioning && timestep != nullptr) {
|
||||||
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]);
|
auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
|
||||||
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier);
|
scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
|
||||||
}
|
}
|
||||||
|
|
||||||
// conv_in with feat_map for left temporal context
|
// conv_in with feat_map for left temporal context
|
||||||
@ -1223,11 +1223,11 @@ struct LTXVideoVAE : public VAE {
|
|||||||
LTXVAE::VideoVAE vae;
|
LTXVAE::VideoVAE vae;
|
||||||
|
|
||||||
LTXVideoVAE(ggml_backend_t backend,
|
LTXVideoVAE(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string& prefix,
|
const std::string& prefix,
|
||||||
bool decode_only = true,
|
bool decode_only = true,
|
||||||
SDVersion version = VERSION_LTXAV)
|
SDVersion version = VERSION_LTXAV,
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
: decode_only(decode_only),
|
: decode_only(decode_only),
|
||||||
ltx_vae_version(LTXVAE::detect_ltx_vae_version(tensor_storage_map, prefix)),
|
ltx_vae_version(LTXVAE::detect_ltx_vae_version(tensor_storage_map, prefix)),
|
||||||
timestep_conditioning(LTXVAE::detect_ltx_vae_timestep_conditioning(tensor_storage_map, prefix)),
|
timestep_conditioning(LTXVAE::detect_ltx_vae_timestep_conditioning(tensor_storage_map, prefix)),
|
||||||
@ -1239,7 +1239,7 @@ struct LTXVideoVAE : public VAE {
|
|||||||
patch_size,
|
patch_size,
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
prefix),
|
prefix),
|
||||||
VAE(version, backend, params_backend, prefix) {
|
VAE(version, backend, prefix, weight_manager) {
|
||||||
vae.init(params_ctx, tensor_storage_map, prefix);
|
vae.init(params_ctx, tensor_storage_map, prefix);
|
||||||
decode_timestep_tensor.values()[0] = vae.decode_timestep;
|
decode_timestep_tensor.values()[0] = vae.decode_timestep;
|
||||||
}
|
}
|
||||||
@ -1521,7 +1521,8 @@ struct LTXVideoVAE : public VAE {
|
|||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
|
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
if (!model_loader.init_from_file_and_convert_name(model_path, "vae.")) {
|
if (!model_loader.init_from_file_and_convert_name(model_path, "vae.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||||
return;
|
return;
|
||||||
@ -1529,22 +1530,19 @@ struct LTXVideoVAE : public VAE {
|
|||||||
|
|
||||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||||
std::shared_ptr<LTXVideoVAE> vae = std::make_shared<LTXVideoVAE>(backend,
|
std::shared_ptr<LTXVideoVAE> vae = std::make_shared<LTXVideoVAE>(backend,
|
||||||
backend,
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"first_stage_model",
|
"first_stage_model",
|
||||||
true,
|
true,
|
||||||
VERSION_LTXAV);
|
VERSION_LTXAV,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
if (!vae->alloc_params_buffer()) {
|
if (!model_manager->register_runner_params("LTX VAE test",
|
||||||
LOG_ERROR("vae buffer allocation failed");
|
*vae,
|
||||||
return;
|
ModelManager::ResidencyMode::Resident,
|
||||||
}
|
backend,
|
||||||
|
backend) ||
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
!model_manager->validate_registered_tensors()) {
|
||||||
vae->get_param_tensors(tensors);
|
LOG_ERROR("register ltx vae tensors with model manager failed");
|
||||||
|
|
||||||
if (!model_loader.load_tensors(tensors)) {
|
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -623,12 +623,12 @@ struct TinyImageAutoEncoder : public VAE {
|
|||||||
bool decode_only = false;
|
bool decode_only = false;
|
||||||
|
|
||||||
TinyImageAutoEncoder(ggml_backend_t backend,
|
TinyImageAutoEncoder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool decoder_only = true,
|
bool decoder_only = true,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1,
|
||||||
: VAE(version, backend, params_backend, "tae"),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: VAE(version, backend, "tae", weight_manager),
|
||||||
decode_only(decoder_only),
|
decode_only(decoder_only),
|
||||||
taesd(decoder_only, version) {
|
taesd(decoder_only, version) {
|
||||||
scale_input = false;
|
scale_input = false;
|
||||||
@ -686,12 +686,12 @@ struct TinyVideoAutoEncoder : public VAE {
|
|||||||
bool is_wide = false;
|
bool is_wide = false;
|
||||||
|
|
||||||
TinyVideoAutoEncoder(ggml_backend_t backend,
|
TinyVideoAutoEncoder(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool decoder_only = true,
|
bool decoder_only = true,
|
||||||
SDVersion version = VERSION_WAN2)
|
SDVersion version = VERSION_WAN2,
|
||||||
: VAE(version, backend, params_backend, "tae"),
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: VAE(version, backend, "tae", weight_manager),
|
||||||
decode_only(decoder_only) {
|
decode_only(decoder_only) {
|
||||||
for (auto tensor_storage : tensor_storage_map) {
|
for (auto tensor_storage : tensor_storage_map) {
|
||||||
if (tensor_storage.first.find(prefix + ".3.conv.6.weight") != std::string::npos) {
|
if (tensor_storage.first.find(prefix + ".3.conv.6.weight") != std::string::npos) {
|
||||||
|
|||||||
@ -1,8 +1,9 @@
|
|||||||
#ifndef __SD_MODEL_VAE_VAE_HPP__
|
#ifndef __SD_MODEL_VAE_VAE_HPP__
|
||||||
#define __SD_MODEL_VAE_VAE_HPP__
|
#define __SD_MODEL_VAE_VAE_HPP__
|
||||||
|
|
||||||
#include "core/tensor_ggml.hpp"
|
#include "core/tensor_ggml.hpp"
|
||||||
#include "model/common/block.hpp"
|
#include "model/common/block.hpp"
|
||||||
|
#include "model_manager.h"
|
||||||
|
|
||||||
struct VAE : public GGMLRunner {
|
struct VAE : public GGMLRunner {
|
||||||
protected:
|
protected:
|
||||||
@ -63,8 +64,11 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
VAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend, const std::string& weight_prefix = "")
|
VAE(SDVersion version,
|
||||||
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, params_backend) {}
|
ggml_backend_t backend,
|
||||||
|
const std::string& weight_prefix = "",
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, weight_manager) {}
|
||||||
|
|
||||||
int get_scale_factor() {
|
int get_scale_factor() {
|
||||||
int scale_factor = 8;
|
int scale_factor = 8;
|
||||||
@ -224,8 +228,10 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct FakeVAE : public VAE {
|
struct FakeVAE : public VAE {
|
||||||
FakeVAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend)
|
FakeVAE(SDVersion version,
|
||||||
: VAE(version, backend, params_backend) {}
|
ggml_backend_t backend,
|
||||||
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: VAE(version, backend, "", weight_manager) {}
|
||||||
|
|
||||||
int get_encoder_output_channels(int input_channels) {
|
int get_encoder_output_channels(int input_channels) {
|
||||||
return input_channels;
|
return input_channels;
|
||||||
|
|||||||
@ -1124,12 +1124,12 @@ namespace WAN {
|
|||||||
WanVAE ae;
|
WanVAE ae;
|
||||||
|
|
||||||
WanVAERunner(ggml_backend_t backend,
|
WanVAERunner(ggml_backend_t backend,
|
||||||
ggml_backend_t params_backend,
|
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
bool decode_only = false,
|
bool decode_only = false,
|
||||||
SDVersion version = VERSION_WAN2)
|
SDVersion version = VERSION_WAN2,
|
||||||
: VAE(version, backend, params_backend, prefix), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) {
|
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||||
|
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) {
|
||||||
ae.init(params_ctx, tensor_storage_map, prefix);
|
ae.init(params_ctx, tensor_storage_map, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1327,27 +1327,24 @@ namespace WAN {
|
|||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = sd_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V);
|
auto model_manager = std::make_shared<ModelManager>();
|
||||||
|
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V, model_manager);
|
||||||
{
|
{
|
||||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||||
|
|
||||||
if (!vae->alloc_params_buffer()) {
|
ModelLoader& model_loader = model_manager->loader();
|
||||||
LOG_ERROR("vae buffer allocation failed");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
|
||||||
vae->get_param_tensors(tensors);
|
|
||||||
|
|
||||||
ModelLoader model_loader;
|
|
||||||
if (!model_loader.init_from_file_and_convert_name(file_path, "vae.")) {
|
if (!model_loader.init_from_file_and_convert_name(file_path, "vae.")) {
|
||||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool success = model_loader.load_tensors(tensors);
|
if (!model_manager->register_runner_params("Wan VAE test",
|
||||||
|
*vae,
|
||||||
if (!success) {
|
ModelManager::ResidencyMode::Resident,
|
||||||
LOG_ERROR("load tensors from model loader failed");
|
backend,
|
||||||
|
backend) ||
|
||||||
|
!model_manager->validate_registered_tensors()) {
|
||||||
|
LOG_ERROR("register wan vae tensors with model manager failed");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -121,6 +121,42 @@ public:
|
|||||||
ggml_backend_t compute_backend,
|
ggml_backend_t compute_backend,
|
||||||
ggml_backend_t params_backend,
|
ggml_backend_t params_backend,
|
||||||
size_t* registered_tensor_size = nullptr);
|
size_t* registered_tensor_size = nullptr);
|
||||||
|
|
||||||
|
template <typename Runner>
|
||||||
|
bool register_runner_params(const std::string& desc,
|
||||||
|
Runner& runner,
|
||||||
|
ResidencyMode residency_mode,
|
||||||
|
ggml_backend_t compute_backend,
|
||||||
|
ggml_backend_t params_backend,
|
||||||
|
size_t* registered_tensor_size = nullptr) {
|
||||||
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
|
runner.get_param_tensors(tensors);
|
||||||
|
return register_param_tensors(desc,
|
||||||
|
std::move(tensors),
|
||||||
|
residency_mode,
|
||||||
|
compute_backend,
|
||||||
|
params_backend,
|
||||||
|
registered_tensor_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Runner>
|
||||||
|
bool register_runner_params(const std::string& desc,
|
||||||
|
Runner& runner,
|
||||||
|
const std::string& prefix,
|
||||||
|
ResidencyMode residency_mode,
|
||||||
|
ggml_backend_t compute_backend,
|
||||||
|
ggml_backend_t params_backend,
|
||||||
|
size_t* registered_tensor_size = nullptr) {
|
||||||
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
|
runner.get_param_tensors(tensors, prefix);
|
||||||
|
return register_param_tensors(desc,
|
||||||
|
std::move(tensors),
|
||||||
|
residency_mode,
|
||||||
|
compute_backend,
|
||||||
|
params_backend,
|
||||||
|
registered_tensor_size);
|
||||||
|
}
|
||||||
|
|
||||||
bool validate_registered_tensors();
|
bool validate_registered_tensors();
|
||||||
|
|
||||||
bool prepare_params(const std::vector<ggml_tensor*>& tensors) override;
|
bool prepare_params(const std::vector<ggml_tensor*>& tensors) override;
|
||||||
|
|||||||
@ -241,7 +241,6 @@ public:
|
|||||||
}
|
}
|
||||||
std::map<std::string, ggml_tensor*> group_tensors;
|
std::map<std::string, ggml_tensor*> group_tensors;
|
||||||
model->get_param_tensors(group_tensors);
|
model->get_param_tensors(group_tensors);
|
||||||
model->set_weight_manager(model_manager);
|
|
||||||
if (model_manager == nullptr) {
|
if (model_manager == nullptr) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -586,33 +585,35 @@ public:
|
|||||||
|
|
||||||
if (sd_version_is_sd3(version)) {
|
if (sd_version_is_sd3(version)) {
|
||||||
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map);
|
|
||||||
diffusion_model = std::make_shared<MMDiTRunner>(backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
model_manager);
|
||||||
|
diffusion_model = std::make_shared<MMDiTRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
|
tensor_storage_map,
|
||||||
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_pid(version)) {
|
} else if (sd_version_is_pid(version)) {
|
||||||
vae_decode_only = false;
|
vae_decode_only = false;
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
|
||||||
version);
|
|
||||||
diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
|
||||||
"model.diffusion_model.net");
|
|
||||||
} else if (sd_version_is_ideogram4(version)) {
|
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version,
|
version,
|
||||||
"",
|
"",
|
||||||
false);
|
false,
|
||||||
diffusion_model = std::make_shared<Ideogram4::Ideogram4Runner>(backend_for(SDBackendModule::DIFFUSION),
|
model_manager);
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
"model.diffusion_model.net",
|
||||||
|
model_manager);
|
||||||
|
} else if (sd_version_is_ideogram4(version)) {
|
||||||
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
|
tensor_storage_map,
|
||||||
|
version,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
|
diffusion_model = std::make_shared<Ideogram4::Ideogram4Runner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
|
tensor_storage_map,
|
||||||
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_flux(version)) {
|
} else if (sd_version_is_flux(version)) {
|
||||||
bool is_chroma = false;
|
bool is_chroma = false;
|
||||||
for (auto pair : tensor_storage_map) {
|
for (auto pair : tensor_storage_map) {
|
||||||
@ -623,66 +624,71 @@ public:
|
|||||||
}
|
}
|
||||||
if (is_chroma) {
|
if (is_chroma) {
|
||||||
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
sd_ctx_params->chroma_use_t5_mask,
|
sd_ctx_params->chroma_use_t5_mask,
|
||||||
sd_ctx_params->chroma_t5_mask_pad);
|
sd_ctx_params->chroma_t5_mask_pad,
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
} else if (version == VERSION_OVIS_IMAGE) {
|
} else if (version == VERSION_OVIS_IMAGE) {
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version,
|
version,
|
||||||
"",
|
"",
|
||||||
false);
|
false,
|
||||||
|
model_manager);
|
||||||
} else {
|
} else {
|
||||||
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
tensor_storage_map,
|
||||||
tensor_storage_map);
|
model_manager);
|
||||||
}
|
}
|
||||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version,
|
version,
|
||||||
sd_ctx_params->chroma_use_dit_mask);
|
sd_ctx_params->chroma_use_dit_mask,
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_flux2(version)) {
|
} else if (sd_version_is_flux2(version)) {
|
||||||
bool is_chroma = false;
|
bool is_chroma = false;
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version);
|
version,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version,
|
version,
|
||||||
sd_ctx_params->chroma_use_dit_mask);
|
sd_ctx_params->chroma_use_dit_mask,
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_ltxav(version)) {
|
} else if (sd_version_is_ltxav(version)) {
|
||||||
cond_stage_model = std::make_shared<LTXAVEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LTXAVEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map);
|
|
||||||
diffusion_model = std::make_shared<LTXV::LTXAVRunner>(backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
"text_encoders.llm",
|
||||||
|
"text_embedding_projection",
|
||||||
|
model_manager);
|
||||||
|
diffusion_model = std::make_shared<LTXV::LTXAVRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
|
tensor_storage_map,
|
||||||
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_wan(version)) {
|
} else if (sd_version_is_wan(version)) {
|
||||||
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
true,
|
true,
|
||||||
0,
|
0,
|
||||||
true);
|
true,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) {
|
if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) {
|
||||||
high_noise_diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
high_noise_diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.high_noise_diffusion_model",
|
"model.high_noise_diffusion_model",
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
}
|
}
|
||||||
if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" ||
|
if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" ||
|
||||||
diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" ||
|
diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" ||
|
||||||
@ -691,8 +697,8 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend_for(SDBackendModule::CLIP_VISION),
|
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend_for(SDBackendModule::CLIP_VISION),
|
||||||
params_backend_for(SDBackendModule::CLIP_VISION),
|
tensor_storage_map,
|
||||||
tensor_storage_map);
|
model_manager);
|
||||||
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||||
if (!register_runner_params("CLIP vision",
|
if (!register_runner_params("CLIP vision",
|
||||||
clip_vision,
|
clip_vision,
|
||||||
@ -706,93 +712,99 @@ public:
|
|||||||
enable_vision = true;
|
enable_vision = true;
|
||||||
}
|
}
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version,
|
version,
|
||||||
"",
|
"",
|
||||||
enable_vision);
|
enable_vision,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<Qwen::QwenImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Qwen::QwenImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version,
|
version,
|
||||||
sd_ctx_params->qwen_image_zero_cond_t);
|
sd_ctx_params->qwen_image_zero_cond_t,
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_longcat(version)) {
|
} else if (sd_version_is_longcat(version)) {
|
||||||
bool enable_vision = false;
|
bool enable_vision = false;
|
||||||
if (!vae_decode_only) {
|
if (!vae_decode_only) {
|
||||||
enable_vision = true;
|
enable_vision = true;
|
||||||
}
|
}
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version,
|
version,
|
||||||
"",
|
"",
|
||||||
enable_vision);
|
enable_vision,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version,
|
version,
|
||||||
sd_ctx_params->chroma_use_dit_mask);
|
sd_ctx_params->chroma_use_dit_mask,
|
||||||
|
model_manager);
|
||||||
} else if (version == VERSION_HIDREAM_O1) {
|
} else if (version == VERSION_HIDREAM_O1) {
|
||||||
cond_stage_model = std::make_shared<HiDreamO1::HiDreamO1Conditioner>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<HiDreamO1::HiDreamO1Conditioner>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map);
|
|
||||||
diffusion_model = std::make_shared<HiDreamO1::HiDreamO1Runner>(backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model");
|
model_manager);
|
||||||
|
diffusion_model = std::make_shared<HiDreamO1::HiDreamO1Runner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
|
tensor_storage_map,
|
||||||
|
"model",
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_anima(version)) {
|
} else if (sd_version_is_anima(version)) {
|
||||||
cond_stage_model = std::make_shared<AnimaConditioner>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<AnimaConditioner>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
tensor_storage_map,
|
||||||
tensor_storage_map);
|
model_manager);
|
||||||
diffusion_model = std::make_shared<Anima::AnimaRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Anima::AnimaRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
|
||||||
"model.diffusion_model");
|
|
||||||
} else if (sd_version_is_z_image(version)) {
|
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
|
||||||
version);
|
|
||||||
diffusion_model = std::make_shared<ZImage::ZImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version);
|
model_manager);
|
||||||
|
} else if (sd_version_is_z_image(version)) {
|
||||||
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
|
tensor_storage_map,
|
||||||
|
version,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
|
diffusion_model = std::make_shared<ZImage::ZImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
|
tensor_storage_map,
|
||||||
|
"model.diffusion_model",
|
||||||
|
version,
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_ernie_image(version)) {
|
} else if (sd_version_is_ernie_image(version)) {
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version);
|
version,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<ErnieImage::ErnieImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<ErnieImage::ErnieImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_lens(version)) {
|
} else if (sd_version_is_lens(version)) {
|
||||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
version);
|
version,
|
||||||
|
"",
|
||||||
|
false,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<Lens::LensRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<Lens::LensRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model");
|
"model.diffusion_model",
|
||||||
|
model_manager);
|
||||||
} else { // SD1.x SD2.x SDXL
|
} else { // SD1.x SD2.x SDXL
|
||||||
std::map<std::string, std::string> embbeding_map;
|
std::map<std::string, std::string> embbeding_map;
|
||||||
for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) {
|
for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) {
|
||||||
embbeding_map.emplace(SAFE_STR(sd_ctx_params->embeddings[i].name), SAFE_STR(sd_ctx_params->embeddings[i].path));
|
embbeding_map.emplace(SAFE_STR(sd_ctx_params->embeddings[i].name), SAFE_STR(sd_ctx_params->embeddings[i].path));
|
||||||
}
|
}
|
||||||
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend_for(SDBackendModule::TE),
|
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend_for(SDBackendModule::TE),
|
||||||
params_backend_for(SDBackendModule::TE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
embbeding_map,
|
embbeding_map,
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
diffusion_model = std::make_shared<UNetModelRunner>(backend_for(SDBackendModule::DIFFUSION),
|
diffusion_model = std::make_shared<UNetModelRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||||
params_backend_for(SDBackendModule::DIFFUSION),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"model.diffusion_model",
|
"model.diffusion_model",
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
if (sd_ctx_params->diffusion_conv_direct) {
|
if (sd_ctx_params->diffusion_conv_direct) {
|
||||||
LOG_INFO("Using Conv2d direct in the diffusion model");
|
LOG_INFO("Using Conv2d direct in the diffusion model");
|
||||||
diffusion_model->set_conv2d_direct_enabled(true);
|
diffusion_model->set_conv2d_direct_enabled(true);
|
||||||
@ -841,19 +853,19 @@ public:
|
|||||||
sd_version_is_anima(version) ||
|
sd_version_is_anima(version) ||
|
||||||
sd_version_is_ltxav(version)) {
|
sd_version_is_ltxav(version)) {
|
||||||
return std::make_shared<TinyVideoAutoEncoder>(backend_for(SDBackendModule::VAE),
|
return std::make_shared<TinyVideoAutoEncoder>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"decoder",
|
"decoder",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
auto model = std::make_shared<TinyImageAutoEncoder>(backend_for(SDBackendModule::VAE),
|
auto model = std::make_shared<TinyImageAutoEncoder>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"decoder.layers",
|
"decoder.layers",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -871,28 +883,28 @@ public:
|
|||||||
auto create_vae = [&]() -> std::shared_ptr<VAE> {
|
auto create_vae = [&]() -> std::shared_ptr<VAE> {
|
||||||
if (sd_version_is_ltxav(version)) {
|
if (sd_version_is_ltxav(version)) {
|
||||||
return std::make_shared<LTXVideoVAE>(backend_for(SDBackendModule::VAE),
|
return std::make_shared<LTXVideoVAE>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"first_stage_model",
|
"first_stage_model",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
} else if (sd_version_is_wan(version) ||
|
} else if (sd_version_is_wan(version) ||
|
||||||
sd_version_is_qwen_image(version) ||
|
sd_version_is_qwen_image(version) ||
|
||||||
sd_version_is_anima(version)) {
|
sd_version_is_anima(version)) {
|
||||||
return std::make_shared<WAN::WanVAERunner>(backend_for(SDBackendModule::VAE),
|
return std::make_shared<WAN::WanVAERunner>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"first_stage_model",
|
"first_stage_model",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
version);
|
version,
|
||||||
|
model_manager);
|
||||||
} else {
|
} else {
|
||||||
auto model = std::make_shared<AutoEncoderKL>(backend_for(SDBackendModule::VAE),
|
auto model = std::make_shared<AutoEncoderKL>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
"first_stage_model",
|
"first_stage_model",
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
false,
|
false,
|
||||||
vae_version);
|
vae_version,
|
||||||
|
model_manager);
|
||||||
if (sd_version_is_sdxl(version) &&
|
if (sd_version_is_sdxl(version) &&
|
||||||
(strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale || external_vae_is_invalid)) {
|
(strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale || external_vae_is_invalid)) {
|
||||||
float vae_conv_2d_scale = 1.f / 32.f;
|
float vae_conv_2d_scale = 1.f / 32.f;
|
||||||
@ -910,7 +922,7 @@ public:
|
|||||||
LOG_INFO("using FakeVAE");
|
LOG_INFO("using FakeVAE");
|
||||||
first_stage_model = std::make_shared<FakeVAE>(version,
|
first_stage_model = std::make_shared<FakeVAE>(version,
|
||||||
backend_for(SDBackendModule::VAE),
|
backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE));
|
model_manager);
|
||||||
if (!register_runner_params("VAE",
|
if (!register_runner_params("VAE",
|
||||||
first_stage_model,
|
first_stage_model,
|
||||||
SDBackendModule::VAE,
|
SDBackendModule::VAE,
|
||||||
@ -952,8 +964,9 @@ public:
|
|||||||
|
|
||||||
if (use_audio_vae) {
|
if (use_audio_vae) {
|
||||||
audio_vae_model = std::make_shared<LTXV::LTXAudioVAERunner>(backend_for(SDBackendModule::VAE),
|
audio_vae_model = std::make_shared<LTXV::LTXAudioVAERunner>(backend_for(SDBackendModule::VAE),
|
||||||
params_backend_for(SDBackendModule::VAE),
|
tensor_storage_map,
|
||||||
tensor_storage_map);
|
"",
|
||||||
|
model_manager);
|
||||||
if (!register_runner_params("LTX audio VAE",
|
if (!register_runner_params("LTX audio VAE",
|
||||||
audio_vae_model,
|
audio_vae_model,
|
||||||
SDBackendModule::VAE,
|
SDBackendModule::VAE,
|
||||||
@ -977,7 +990,9 @@ public:
|
|||||||
control_net = std::make_shared<ControlNet>(backend_for(SDBackendModule::CONTROL_NET),
|
control_net = std::make_shared<ControlNet>(backend_for(SDBackendModule::CONTROL_NET),
|
||||||
params_backend_for(SDBackendModule::CONTROL_NET),
|
params_backend_for(SDBackendModule::CONTROL_NET),
|
||||||
model_loader.get_tensor_storage_map(),
|
model_loader.get_tensor_storage_map(),
|
||||||
version);
|
version,
|
||||||
|
"",
|
||||||
|
model_manager);
|
||||||
if (sd_ctx_params->diffusion_conv_direct) {
|
if (sd_ctx_params->diffusion_conv_direct) {
|
||||||
LOG_INFO("Using Conv2d direct in the control net");
|
LOG_INFO("Using Conv2d direct in the control net");
|
||||||
control_net->set_conv2d_direct_enabled(true);
|
control_net->set_conv2d_direct_enabled(true);
|
||||||
@ -998,6 +1013,7 @@ public:
|
|||||||
version,
|
version,
|
||||||
tensor_storage_map,
|
tensor_storage_map,
|
||||||
model_loader,
|
model_loader,
|
||||||
|
model_manager,
|
||||||
n_threads,
|
n_threads,
|
||||||
[this](SDBackendModule module) { return ensure_backend_pair(module); },
|
[this](SDBackendModule module) { return ensure_backend_pair(module); },
|
||||||
[this](SDBackendModule module) { return backend_for(module); },
|
[this](SDBackendModule module) { return backend_for(module); },
|
||||||
@ -5008,8 +5024,8 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
|
|||||||
|
|
||||||
std::unique_ptr<LTXVUpsampler::LatentUpsamplerRunner> upsampler =
|
std::unique_ptr<LTXVUpsampler::LatentUpsamplerRunner> upsampler =
|
||||||
std::make_unique<LTXVUpsampler::LatentUpsamplerRunner>(sd_ctx->sd->backend_for(SDBackendModule::UPSCALER),
|
std::make_unique<LTXVUpsampler::LatentUpsamplerRunner>(sd_ctx->sd->backend_for(SDBackendModule::UPSCALER),
|
||||||
sd_ctx->sd->params_backend_for(SDBackendModule::UPSCALER),
|
model_loader.get_tensor_storage_map(),
|
||||||
model_loader.get_tensor_storage_map());
|
upsampler_manager);
|
||||||
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
||||||
upsampler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
upsampler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||||
if (upsampler->model == nullptr) {
|
if (upsampler->model == nullptr) {
|
||||||
@ -5019,7 +5035,6 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
|
|||||||
|
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
upsampler->get_param_tensors(tensors);
|
upsampler->get_param_tensors(tensors);
|
||||||
upsampler->set_weight_manager(upsampler_manager);
|
|
||||||
if (!upsampler_manager->register_param_tensors("LTX latent upsampler",
|
if (!upsampler_manager->register_param_tensors("LTX latent upsampler",
|
||||||
std::move(tensors),
|
std::move(tensors),
|
||||||
ModelManager::ResidencyMode::Resident,
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
|||||||
@ -90,8 +90,8 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
|||||||
model_loader.set_wtype_override(model_data_type);
|
model_loader.set_wtype_override(model_data_type);
|
||||||
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
||||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER),
|
esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER),
|
||||||
params_backend_for(SDBackendModule::UPSCALER),
|
model_loader.get_tensor_storage_map(),
|
||||||
model_loader.get_tensor_storage_map());
|
model_manager);
|
||||||
if (esrgan_upscaler == nullptr || esrgan_upscaler->rrdb_net == nullptr) {
|
if (esrgan_upscaler == nullptr || esrgan_upscaler->rrdb_net == nullptr) {
|
||||||
LOG_ERROR("init esrgan model from metadata failed: '%s'", esrgan_path.c_str());
|
LOG_ERROR("init esrgan model from metadata failed: '%s'", esrgan_path.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -104,7 +104,6 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
|||||||
|
|
||||||
std::map<std::string, ggml_tensor*> tensors;
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
esrgan_upscaler->get_param_tensors(tensors);
|
esrgan_upscaler->get_param_tensors(tensors);
|
||||||
esrgan_upscaler->set_weight_manager(model_manager);
|
|
||||||
if (!model_manager->register_param_tensors("ESRGAN",
|
if (!model_manager->register_param_tensors("ESRGAN",
|
||||||
std::move(tensors),
|
std::move(tensors),
|
||||||
ModelManager::ResidencyMode::Resident,
|
ModelManager::ResidencyMode::Resident,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user