mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-17 03:37:20 +00:00
refactor: route all runner params through model manager (#1649)
This commit is contained in:
parent
9b0fceb41b
commit
8d4c7af95b
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_CONDITIONING_CONDITIONER_HPP__
|
||||
#ifndef __SD_CONDITIONING_CONDITIONER_HPP__
|
||||
#define __SD_CONDITIONING_CONDITIONER_HPP__
|
||||
|
||||
#include <cmath>
|
||||
@ -118,7 +118,6 @@ public:
|
||||
virtual void set_stream_layers_enabled(bool enabled) {}
|
||||
virtual void set_flash_attention_enabled(bool enabled) = 0;
|
||||
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) {}
|
||||
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {}
|
||||
virtual void runner_done() {}
|
||||
};
|
||||
|
||||
@ -137,10 +136,10 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
std::map<std::string, std::pair<int, int>> embedding_pos_map;
|
||||
|
||||
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::map<std::string, std::string>& orig_embedding_map,
|
||||
SDVersion version = VERSION_SD1)
|
||||
SDVersion version = VERSION_SD1,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: version(version), tokenizer(sd_version_is_sd2(version) ? 0 : 49407) {
|
||||
for (const auto& kv : orig_embedding_map) {
|
||||
std::string name = kv.first;
|
||||
@ -150,12 +149,12 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
}
|
||||
bool force_clip_f32 = !embedding_map.empty();
|
||||
if (sd_version_is_sd1(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, force_clip_f32, weight_manager);
|
||||
} else if (sd_version_is_sd2(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, true, force_clip_f32, weight_manager);
|
||||
} else if (sd_version_is_sdxl(version)) {
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32);
|
||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32);
|
||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, force_clip_f32, weight_manager);
|
||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, force_clip_f32, weight_manager);
|
||||
}
|
||||
}
|
||||
|
||||
@ -194,13 +193,6 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
}
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
text_model->set_weight_manager(manager);
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
text_model2->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
text_model->runner_done();
|
||||
if (sd_version_is_sdxl(version)) {
|
||||
@ -522,9 +514,9 @@ struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
||||
std::string weight_prefix = "cond_stage_model.transformer";
|
||||
|
||||
FrozenCLIPVisionEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {})
|
||||
: GGMLRunner(backend, params_backend) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager) {
|
||||
bool proj_in = false;
|
||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||
if (!starts_with(name, weight_prefix)) {
|
||||
@ -580,8 +572,8 @@ struct SD3CLIPEmbedder : public Conditioner {
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
|
||||
SD3CLIPEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {})
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: clip_g_tokenizer(0) {
|
||||
bool use_clip_l = false;
|
||||
bool use_clip_g = false;
|
||||
@ -600,13 +592,13 @@ struct SD3CLIPEmbedder : public Conditioner {
|
||||
return;
|
||||
}
|
||||
if (use_clip_l) {
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false, false, weight_manager);
|
||||
}
|
||||
if (use_clip_g) {
|
||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false, false, weight_manager);
|
||||
}
|
||||
if (use_t5) {
|
||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer");
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
|
||||
}
|
||||
}
|
||||
|
||||
@ -670,18 +662,6 @@ struct SD3CLIPEmbedder : public Conditioner {
|
||||
}
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
if (clip_l) {
|
||||
clip_l->set_weight_manager(manager);
|
||||
}
|
||||
if (clip_g) {
|
||||
clip_g->set_weight_manager(manager);
|
||||
}
|
||||
if (t5) {
|
||||
t5->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
if (clip_l) {
|
||||
clip_l->runner_done();
|
||||
@ -961,8 +941,8 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
size_t chunk_len = 256;
|
||||
|
||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {}) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||
bool use_clip_l = false;
|
||||
bool use_t5 = false;
|
||||
for (auto pair : tensor_storage_map) {
|
||||
@ -979,12 +959,12 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
}
|
||||
|
||||
if (use_clip_l) {
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, params_backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_storage_map, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true, false, weight_manager);
|
||||
} else {
|
||||
LOG_WARN("clip_l text encoder not found! Prompt adherence might be degraded.");
|
||||
}
|
||||
if (use_t5) {
|
||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer");
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", false, weight_manager);
|
||||
} else {
|
||||
LOG_WARN("t5xxl text encoder not found! Prompt adherence might be degraded.");
|
||||
}
|
||||
@ -1035,15 +1015,6 @@ struct FluxCLIPEmbedder : public Conditioner {
|
||||
}
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
if (clip_l) {
|
||||
clip_l->set_weight_manager(manager);
|
||||
}
|
||||
if (t5) {
|
||||
t5->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
if (clip_l) {
|
||||
clip_l->runner_done();
|
||||
@ -1219,11 +1190,11 @@ struct T5CLIPEmbedder : public Conditioner {
|
||||
bool is_umt5 = false;
|
||||
|
||||
T5CLIPEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
bool use_mask = false,
|
||||
int mask_pad = 0,
|
||||
bool is_umt5 = false)
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
bool use_mask = false,
|
||||
int mask_pad = 0,
|
||||
bool is_umt5 = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: use_mask(use_mask), mask_pad(mask_pad), t5_tokenizer(is_umt5) {
|
||||
bool use_t5 = false;
|
||||
for (auto pair : tensor_storage_map) {
|
||||
@ -1236,7 +1207,7 @@ struct T5CLIPEmbedder : public Conditioner {
|
||||
LOG_WARN("IMPORTANT NOTICE: No text encoders provided, cannot process prompts!");
|
||||
return;
|
||||
} else {
|
||||
t5 = std::make_shared<T5Runner>(backend, params_backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5);
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_storage_map, "text_encoders.t5xxl.transformer", is_umt5, weight_manager);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1270,12 +1241,6 @@ struct T5CLIPEmbedder : public Conditioner {
|
||||
}
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
if (t5) {
|
||||
t5->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
if (t5) {
|
||||
t5->runner_done();
|
||||
@ -1422,15 +1387,15 @@ struct AnimaConditioner : public Conditioner {
|
||||
std::shared_ptr<LLM::LLMRunner> llm;
|
||||
|
||||
AnimaConditioner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {}) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||
qwen_tokenizer = std::make_shared<Qwen2Tokenizer>();
|
||||
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::QWEN3,
|
||||
backend,
|
||||
params_backend,
|
||||
tensor_storage_map,
|
||||
"text_encoders.llm",
|
||||
false);
|
||||
false,
|
||||
weight_manager);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||
@ -1453,10 +1418,6 @@ struct AnimaConditioner : public Conditioner {
|
||||
llm->set_weight_adapter(adapter);
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
llm->set_weight_manager(manager);
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
llm->runner_done();
|
||||
}
|
||||
@ -1545,11 +1506,11 @@ struct LLMEmbedder : public Conditioner {
|
||||
std::shared_ptr<LLM::LLMRunner> llm;
|
||||
|
||||
LLMEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
SDVersion version = VERSION_QWEN_IMAGE,
|
||||
const std::string prefix = "",
|
||||
bool enable_vision = false)
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
SDVersion version = VERSION_QWEN_IMAGE,
|
||||
const std::string prefix = "",
|
||||
bool enable_vision = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: version(version) {
|
||||
LLM::LLMArch arch = LLM::LLMArch::QWEN2_5_VL;
|
||||
if (version == VERSION_FLUX2) {
|
||||
@ -1576,10 +1537,10 @@ struct LLMEmbedder : public Conditioner {
|
||||
}
|
||||
llm = std::make_shared<LLM::LLMRunner>(arch,
|
||||
backend,
|
||||
params_backend,
|
||||
tensor_storage_map,
|
||||
"text_encoders.llm",
|
||||
enable_vision);
|
||||
enable_vision,
|
||||
weight_manager);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||
@ -1604,12 +1565,6 @@ struct LLMEmbedder : public Conditioner {
|
||||
}
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
if (llm) {
|
||||
llm->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
if (llm) {
|
||||
llm->runner_done();
|
||||
@ -2106,10 +2061,10 @@ struct LTXAVTextProjectionRunner : public GGMLRunner {
|
||||
LTXAVTextProjection model;
|
||||
|
||||
LTXAVTextProjectionRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "")
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
model(tensor_storage_map.find(prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end()) {
|
||||
model.init(params_ctx, tensor_storage_map, prefix);
|
||||
}
|
||||
@ -2154,22 +2109,22 @@ struct LTXAVEmbedder : public Conditioner {
|
||||
bool dual_projection = false;
|
||||
|
||||
LTXAVEmbedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& llm_prefix = "text_encoders.llm",
|
||||
const std::string& projector_prefix = "text_embedding_projection") {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& llm_prefix = "text_encoders.llm",
|
||||
const std::string& projector_prefix = "text_embedding_projection",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr) {
|
||||
tokenizer = std::make_shared<GemmaTokenizer>();
|
||||
llm = std::make_shared<LLM::LLMRunner>(LLM::LLMArch::GEMMA3_12B,
|
||||
backend,
|
||||
params_backend,
|
||||
tensor_storage_map,
|
||||
llm_prefix,
|
||||
false);
|
||||
false,
|
||||
weight_manager);
|
||||
dual_projection = tensor_storage_map.find(projector_prefix + ".video_aggregate_embed.weight") != tensor_storage_map.end();
|
||||
projector = std::make_shared<LTXAVTextProjectionRunner>(backend,
|
||||
params_backend,
|
||||
tensor_storage_map,
|
||||
projector_prefix);
|
||||
projector_prefix,
|
||||
weight_manager);
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||
@ -2192,11 +2147,6 @@ struct LTXAVEmbedder : public Conditioner {
|
||||
projector->set_weight_adapter(adapter);
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
llm->set_weight_manager(manager);
|
||||
projector->set_weight_manager(manager);
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
llm->runner_done();
|
||||
projector->runner_done();
|
||||
|
||||
@ -1696,11 +1696,9 @@ protected:
|
||||
using GraphCutSegment = sd::ggml_graph_cut::Segment;
|
||||
using GraphCutPlan = sd::ggml_graph_cut::Plan;
|
||||
|
||||
ggml_backend_t params_backend = nullptr;
|
||||
ggml_backend_t runtime_backend = nullptr;
|
||||
|
||||
ggml_context* params_ctx = nullptr;
|
||||
ggml_backend_buffer_t params_buffer = nullptr;
|
||||
ggml_context* params_ctx = nullptr;
|
||||
|
||||
ggml_context* cache_ctx = nullptr;
|
||||
ggml_backend_buffer_t cache_buffer = nullptr;
|
||||
@ -1880,9 +1878,6 @@ protected:
|
||||
auto manager = weight_manager.lock();
|
||||
if (manager == nullptr) {
|
||||
if (!params_to_prepare.empty()) {
|
||||
if (params_buffer != nullptr) {
|
||||
return true;
|
||||
}
|
||||
LOG_ERROR("%s weight manager is not set for graph params", get_desc().c_str());
|
||||
return false;
|
||||
}
|
||||
@ -2194,13 +2189,11 @@ protected:
|
||||
plan.valid &&
|
||||
max_graph_vram_bytes > 0 &&
|
||||
plan.segments.size() > 1 &&
|
||||
params_backend != runtime_backend &&
|
||||
!sd_backend_is_cpu(runtime_backend);
|
||||
}
|
||||
|
||||
bool can_attempt_graph_cut_segmented_compute() const {
|
||||
return max_graph_vram_bytes > 0 &&
|
||||
params_backend != runtime_backend &&
|
||||
!sd_backend_is_cpu(runtime_backend);
|
||||
}
|
||||
|
||||
@ -2631,16 +2624,15 @@ public:
|
||||
public:
|
||||
virtual std::string get_desc() = 0;
|
||||
|
||||
GGMLRunner(ggml_backend_t backend, ggml_backend_t params_backend)
|
||||
: params_backend(params_backend),
|
||||
runtime_backend(backend) {
|
||||
GGMLRunner(ggml_backend_t backend,
|
||||
std::shared_ptr<RunnerWeightManager> manager = nullptr)
|
||||
: runtime_backend(backend),
|
||||
weight_manager(manager) {
|
||||
GGML_ASSERT(runtime_backend != nullptr);
|
||||
GGML_ASSERT(params_backend != nullptr);
|
||||
alloc_params_ctx();
|
||||
}
|
||||
|
||||
virtual ~GGMLRunner() {
|
||||
free_params_buffer();
|
||||
free_compute_buffer();
|
||||
free_params_ctx();
|
||||
free_compute_ctx();
|
||||
@ -2674,73 +2666,6 @@ public:
|
||||
alloc_compute_ctx();
|
||||
}
|
||||
|
||||
bool alloc_params_buffer() {
|
||||
size_t num_tensors = ggml_tensor_num(params_ctx);
|
||||
if (num_tensors > 0) {
|
||||
// ggml_backend_alloc_ctx_tensors fails when all tensors are already allocated
|
||||
// (typical for memory-mapped weights). See ggml-alloc.c n_buffers==0 branch.
|
||||
bool all_have_data = true;
|
||||
for (ggml_tensor* t = ggml_get_first_tensor(params_ctx); t != nullptr; t = ggml_get_next_tensor(params_ctx, t)) {
|
||||
if (t->data == nullptr) {
|
||||
all_have_data = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_have_data) {
|
||||
LOG_DEBUG("%s all params already mmap-allocated (no separate buffer needed)", get_desc().c_str());
|
||||
params_buffer = nullptr;
|
||||
rebuild_params_tensor_set();
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
LOG_DEBUG("%s skipping params allocation (no tensors)", get_desc().c_str());
|
||||
return true;
|
||||
}
|
||||
// Pinned host buffer when CPU-offloaded for DMA-direct H2D.
|
||||
ggml_backend_buffer_type_t params_buft = nullptr;
|
||||
if (params_backend != runtime_backend) {
|
||||
ggml_backend_dev_t runtime_dev = ggml_backend_get_device(runtime_backend);
|
||||
if (runtime_dev != nullptr) {
|
||||
params_buft = ggml_backend_dev_host_buffer_type(runtime_dev);
|
||||
}
|
||||
}
|
||||
if (params_buft == nullptr) {
|
||||
params_buft = ggml_backend_get_default_buffer_type(params_backend);
|
||||
}
|
||||
params_buffer = ggml_backend_alloc_ctx_tensors_from_buft(params_ctx, params_buft);
|
||||
if (params_buffer == nullptr) {
|
||||
LOG_ERROR("%s alloc params backend buffer failed, num_tensors = %i",
|
||||
get_desc().c_str(),
|
||||
num_tensors);
|
||||
return false;
|
||||
}
|
||||
rebuild_params_tensor_set();
|
||||
ggml_backend_buffer_set_usage(params_buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
size_t params_buffer_size = ggml_backend_buffer_get_size(params_buffer);
|
||||
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
||||
get_desc().c_str(),
|
||||
params_buffer_size / (1024.f * 1024.f),
|
||||
sd_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
|
||||
num_tensors);
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void free_params_buffer() {
|
||||
if (params_buffer != nullptr) {
|
||||
ggml_backend_buffer_free(params_buffer);
|
||||
params_buffer = nullptr;
|
||||
}
|
||||
observed_max_effective_budget_ = 0;
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
if (params_buffer != nullptr) {
|
||||
return ggml_backend_buffer_get_size(params_buffer);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public:
|
||||
void free_cache_ctx_and_buffer() {
|
||||
free_cache_buffer();
|
||||
@ -2886,15 +2811,6 @@ public:
|
||||
weight_adapter = adapter;
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) {
|
||||
weight_manager = manager;
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager,
|
||||
const std::string&) {
|
||||
set_weight_manager(manager);
|
||||
}
|
||||
|
||||
void set_max_graph_vram_bytes(size_t max_vram_bytes) {
|
||||
max_graph_vram_bytes = max_vram_bytes;
|
||||
}
|
||||
@ -2902,14 +2818,6 @@ public:
|
||||
void set_stream_layers_enabled(bool enabled) {
|
||||
stream_layers_enabled = enabled;
|
||||
}
|
||||
|
||||
ggml_backend_t get_runtime_backend() {
|
||||
return runtime_backend;
|
||||
}
|
||||
|
||||
ggml_backend_t get_params_backend() {
|
||||
return params_backend;
|
||||
}
|
||||
};
|
||||
|
||||
class GGMLBlock {
|
||||
|
||||
@ -19,6 +19,7 @@ struct GenerationExtensionInitContext {
|
||||
SDVersion version;
|
||||
const String2TensorStorage& tensor_storage_map;
|
||||
ModelLoader& model_loader;
|
||||
std::shared_ptr<ModelManager> model_manager;
|
||||
int n_threads;
|
||||
std::function<bool(SDBackendModule)> ensure_backend_pair;
|
||||
std::function<ggml_backend_t(SDBackendModule)> backend_for;
|
||||
@ -46,7 +47,6 @@ struct GenerationExtension {
|
||||
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>&) {}
|
||||
virtual void collect_loras(std::vector<ModelManager::LoraSpec>&) {}
|
||||
virtual void add_ignore_tensors(std::set<std::string>&) const {}
|
||||
virtual void set_weight_manager(const std::shared_ptr<RunnerWeightManager>&) {}
|
||||
virtual void runner_done() {}
|
||||
virtual void reset_runtime_condition() {}
|
||||
virtual bool prepare_condition(GenerationExtensionConditionContext&) {
|
||||
|
||||
@ -134,11 +134,12 @@ struct PhotoMakerExtension : public GenerationExtension {
|
||||
}
|
||||
|
||||
pmid_model = std::make_shared<PhotoMakerIDEncoder>(ctx.backend_for(SDBackendModule::PHOTOMAKER),
|
||||
ctx.params_backend_for(SDBackendModule::PHOTOMAKER),
|
||||
ctx.tensor_storage_map,
|
||||
"pmid",
|
||||
ctx.version,
|
||||
pm_version);
|
||||
pm_version,
|
||||
20.f,
|
||||
ctx.model_manager);
|
||||
if (pm_version == PM_VERSION_2) {
|
||||
LOG_INFO("using PhotoMaker Version 2");
|
||||
}
|
||||
@ -174,12 +175,6 @@ struct PhotoMakerExtension : public GenerationExtension {
|
||||
ignore_tensors.insert("pmid.unet.");
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
if (pmid_model != nullptr) {
|
||||
pmid_model->set_weight_manager(manager);
|
||||
}
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
if (pmid_model != nullptr) {
|
||||
pmid_model->runner_done();
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include <mutex>
|
||||
#include "core/ggml_extend.hpp"
|
||||
#include "model_loader.h"
|
||||
#include "model_manager.h"
|
||||
|
||||
#define LORA_GRAPH_BASE_SIZE 10240
|
||||
|
||||
@ -14,22 +15,24 @@ struct LoraModel : public GGMLRunner {
|
||||
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
||||
std::set<std::string> applied_lora_tensors;
|
||||
std::string file_path;
|
||||
ModelLoader model_loader;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
bool tensor_preprocessed = false;
|
||||
std::shared_ptr<ModelManager> model_manager;
|
||||
ggml_backend_t params_backend = nullptr;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
bool tensor_preprocessed = false;
|
||||
|
||||
typedef std::function<bool(const std::string&)> filter_t;
|
||||
|
||||
LoraModel(const std::string& lora_id,
|
||||
ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const std::string& file_path = "",
|
||||
std::string prefix = "",
|
||||
SDVersion version = VERSION_COUNT)
|
||||
: lora_id(lora_id), file_path(file_path), GGMLRunner(backend, params_backend) {
|
||||
ggml_backend_t params_backend_,
|
||||
const std::string& file_path = "",
|
||||
std::string prefix = "",
|
||||
SDVersion version = VERSION_COUNT,
|
||||
std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>())
|
||||
: GGMLRunner(backend, manager), lora_id(lora_id), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
|
||||
prefix = "lora." + prefix;
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, prefix, version)) {
|
||||
if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix, version)) {
|
||||
load_failed = true;
|
||||
}
|
||||
}
|
||||
@ -71,7 +74,10 @@ struct LoraModel : public GGMLRunner {
|
||||
return true;
|
||||
};
|
||||
|
||||
model_loader.set_n_threads(n_threads);
|
||||
if (model_manager != nullptr) {
|
||||
model_manager->set_n_threads(n_threads);
|
||||
}
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
model_loader.load_tensors(on_new_tensor_cb);
|
||||
|
||||
if (tensors_to_create.empty()) {
|
||||
@ -88,23 +94,42 @@ struct LoraModel : public GGMLRunner {
|
||||
lora_tensors[name] = real;
|
||||
}
|
||||
|
||||
if (!alloc_params_buffer()) {
|
||||
LOG_ERROR("lora model buffer allocation failed");
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
for (const auto& pair : lora_tensors) {
|
||||
tensors[pair.first] = pair.second;
|
||||
}
|
||||
if (model_manager == nullptr ||
|
||||
!model_manager->register_param_tensors("LoRA",
|
||||
std::move(tensors),
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
runtime_backend,
|
||||
params_backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("lora model manager registration failed");
|
||||
return false;
|
||||
}
|
||||
std::vector<ggml_tensor*> lora_params;
|
||||
lora_params.reserve(lora_tensors.size());
|
||||
for (const auto& pair : lora_tensors) {
|
||||
lora_params.push_back(pair.second);
|
||||
}
|
||||
if (!model_manager->prepare_params(lora_params)) {
|
||||
LOG_ERROR("lora model manager prepare params failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
dry_run = false;
|
||||
model_loader.load_tensors(on_new_tensor_cb);
|
||||
|
||||
LOG_DEBUG("finished loaded lora");
|
||||
return true;
|
||||
}
|
||||
|
||||
void release_loaded_tensors() {
|
||||
runner_done();
|
||||
free_compute_buffer();
|
||||
free_params_buffer();
|
||||
model_manager.reset();
|
||||
free_params_ctx();
|
||||
alloc_params_ctx();
|
||||
model_manager = std::make_shared<ModelManager>();
|
||||
weight_manager = model_manager;
|
||||
lora_tensors.clear();
|
||||
original_tensor_to_final_tensor.clear();
|
||||
applied_lora_tensors.clear();
|
||||
|
||||
@ -413,13 +413,13 @@ public:
|
||||
|
||||
public:
|
||||
PhotoMakerIDEncoder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
SDVersion version = VERSION_SDXL,
|
||||
PMVersion pm_v = PM_VERSION_1,
|
||||
float sty = 20.f)
|
||||
: GGMLRunner(backend, params_backend),
|
||||
SDVersion version = VERSION_SDXL,
|
||||
PMVersion pm_v = PM_VERSION_1,
|
||||
float sty = 20.f,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
version(version),
|
||||
pm_version(pm_v),
|
||||
style_strength(sty) {
|
||||
@ -565,17 +565,18 @@ public:
|
||||
struct PhotoMakerIDEmbed : public GGMLRunner {
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
std::string file_path;
|
||||
ModelLoader* model_loader;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
std::shared_ptr<ModelManager> model_manager;
|
||||
ggml_backend_t params_backend = nullptr;
|
||||
bool load_failed = false;
|
||||
bool applied = false;
|
||||
|
||||
PhotoMakerIDEmbed(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
ModelLoader* ml,
|
||||
const std::string& file_path = "",
|
||||
const std::string& prefix = "")
|
||||
: file_path(file_path), GGMLRunner(backend, params_backend), model_loader(ml) {
|
||||
if (!model_loader->init_from_file_and_convert_name(file_path, prefix)) {
|
||||
ggml_backend_t params_backend_,
|
||||
std::shared_ptr<ModelManager> manager = std::make_shared<ModelManager>(),
|
||||
const std::string& file_path = "",
|
||||
const std::string& prefix = "")
|
||||
: GGMLRunner(backend, manager), file_path(file_path), model_manager(std::move(manager)), params_backend(params_backend_) {
|
||||
if (model_manager == nullptr || !model_manager->loader().init_from_file_and_convert_name(file_path, prefix)) {
|
||||
load_failed = true;
|
||||
}
|
||||
}
|
||||
@ -616,15 +617,27 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
||||
return true;
|
||||
};
|
||||
|
||||
model_loader->set_n_threads(n_threads);
|
||||
model_loader->load_tensors(on_new_tensor_cb);
|
||||
if (!alloc_params_buffer()) {
|
||||
LOG_ERROR("PhotoMaker ID embeds buffer allocation failed");
|
||||
model_manager->set_n_threads(n_threads);
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
model_loader.load_tensors(on_new_tensor_cb);
|
||||
if (!model_manager->register_param_tensors("PhotoMaker ID embeds",
|
||||
tensors,
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
runtime_backend,
|
||||
params_backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("PhotoMaker ID embeds model manager registration failed");
|
||||
return false;
|
||||
}
|
||||
std::vector<ggml_tensor*> id_embed_params;
|
||||
id_embed_params.reserve(tensors.size());
|
||||
for (const auto& pair : tensors) {
|
||||
id_embed_params.push_back(pair.second);
|
||||
}
|
||||
if (!model_manager->prepare_params(id_embed_params)) {
|
||||
LOG_ERROR("PhotoMaker ID embeds model manager prepare params failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
dry_run = false;
|
||||
model_loader->load_tensors(on_new_tensor_cb);
|
||||
|
||||
LOG_DEBUG("finished loading PhotoMaker ID Embeds ");
|
||||
return true;
|
||||
|
||||
@ -560,11 +560,11 @@ protected:
|
||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||
}
|
||||
|
||||
float get_alpha() {
|
||||
ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
|
||||
// image_only_indicator is always tensor([0.]) and since mix_factor.shape is [1,]
|
||||
// so learned_with_images is same as learned
|
||||
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]);
|
||||
return sigmoid(alpha);
|
||||
auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
|
||||
return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -578,11 +578,12 @@ public:
|
||||
ggml_tensor* x_spatial,
|
||||
ggml_tensor* x_temporal) {
|
||||
// image_only_indicator is always tensor([0.])
|
||||
float alpha = get_alpha();
|
||||
auto x = ggml_add(ctx->ggml_ctx,
|
||||
ggml_ext_scale(ctx->ggml_ctx, x_spatial, alpha),
|
||||
ggml_ext_scale(ctx->ggml_ctx, x_temporal, 1.0f - alpha));
|
||||
return x;
|
||||
auto alpha = get_alpha(ctx);
|
||||
return ggml_add(ctx->ggml_ctx,
|
||||
x_temporal,
|
||||
ggml_mul(ctx->ggml_ctx,
|
||||
ggml_sub(ctx->ggml_ctx, x_spatial, x_temporal),
|
||||
alpha));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -561,10 +561,10 @@ namespace Anima {
|
||||
AnimaNet net;
|
||||
|
||||
AnimaRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "model.diffusion_model")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "model.diffusion_model",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(AnimaConfig::detect_from_weights(tensor_storage_map, prefix + ".net")) {
|
||||
net = AnimaNet(config);
|
||||
net.init(params_ctx, tensor_storage_map, prefix + ".net");
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
#ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
||||
#ifndef __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
||||
#define __SD_MODEL_DIFFUSION_CONTROL_HPP__
|
||||
|
||||
#include "model/common/block.hpp"
|
||||
#include "model_loader.h"
|
||||
#include "model_manager.h"
|
||||
|
||||
#define CONTROL_NET_GRAPH_SIZE 1536
|
||||
|
||||
@ -318,13 +319,16 @@ struct ControlNet : public GGMLRunner {
|
||||
std::vector<sd::Tensor<float>> controls;
|
||||
sd::Tensor<float> guided_hint;
|
||||
bool guided_hint_cached = false;
|
||||
std::shared_ptr<ModelManager> owned_model_manager;
|
||||
ggml_backend_t params_backend = nullptr;
|
||||
|
||||
ControlNet(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
SDVersion version = VERSION_SD1,
|
||||
const std::string& prefix = "")
|
||||
: GGMLRunner(backend, params_backend), version(version), control_net(version), weight_prefix(prefix) {
|
||||
ggml_backend_t params_backend_,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
SDVersion version = VERSION_SD1,
|
||||
const std::string& prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager), version(version), control_net(version), weight_prefix(prefix), params_backend(params_backend_) {
|
||||
control_net.init(params_ctx, tensor_storage_map, prefix);
|
||||
}
|
||||
|
||||
@ -459,31 +463,35 @@ struct ControlNet : public GGMLRunner {
|
||||
|
||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
||||
if (!alloc_params_buffer()) {
|
||||
LOG_ERROR("control net model buffer allocation failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
control_net.get_param_tensors(tensors);
|
||||
std::set<std::string> ignore_tensors;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto manager = std::dynamic_pointer_cast<ModelManager>(weight_manager.lock());
|
||||
if (manager == nullptr) {
|
||||
owned_model_manager = std::make_shared<ModelManager>();
|
||||
weight_manager = owned_model_manager;
|
||||
manager = owned_model_manager;
|
||||
}
|
||||
|
||||
ModelLoader& model_loader = manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||
LOG_ERROR("init control net model loader from file failed: '%s'", file_path.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
model_loader.set_n_threads(n_threads);
|
||||
bool success = model_loader.load_tensors(tensors, ignore_tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load control net tensors from model loader failed");
|
||||
manager->set_n_threads(n_threads);
|
||||
if (!manager->register_param_tensors("ControlNet",
|
||||
std::move(tensors),
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
runtime_backend,
|
||||
params_backend) ||
|
||||
!manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register control net tensors with model manager failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_INFO("control net model loaded");
|
||||
return success;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -387,10 +387,10 @@ namespace ErnieImage {
|
||||
std::vector<float> pe_vec;
|
||||
|
||||
ErnieImageRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(ErnieImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
ernie_image = ErnieImageModel(config);
|
||||
ernie_image.init(params_ctx, tensor_storage_map, prefix);
|
||||
|
||||
@ -1301,12 +1301,12 @@ namespace Flux {
|
||||
bool use_mask = false;
|
||||
|
||||
FluxRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_FLUX,
|
||||
bool use_mask = false)
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_FLUX,
|
||||
bool use_mask = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(FluxConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
||||
version(version),
|
||||
use_mask(use_mask) {
|
||||
@ -1583,7 +1583,8 @@ namespace Flux {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -1599,24 +1600,20 @@ namespace Flux {
|
||||
}
|
||||
|
||||
std::shared_ptr<FluxRunner> flux = std::make_shared<FluxRunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
VERSION_FLUX2,
|
||||
false);
|
||||
false,
|
||||
model_manager);
|
||||
|
||||
if (!flux->alloc_params_buffer()) {
|
||||
LOG_ERROR("flux model allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
flux->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("Flux test",
|
||||
*flux,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register flux tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
||||
#ifndef __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
||||
#define __SD_MODEL_DIFFUSION_HIDREAM_O1_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
@ -282,10 +282,10 @@ namespace HiDreamO1 {
|
||||
std::array<std::vector<float>, 4> pos_embed_weight_data_;
|
||||
|
||||
HiDreamO1VisionRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model.visual")
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model.visual",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)),
|
||||
model(std::make_shared<LLM::VisionModel>(false, config.llm.vision)) {
|
||||
model->init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -343,10 +343,10 @@ namespace HiDreamO1 {
|
||||
std::vector<float> attention_mask_vec;
|
||||
|
||||
HiDreamO1Runner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(HiDreamO1Config::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
model = HiDreamO1Model(config);
|
||||
model.init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -490,9 +490,9 @@ namespace HiDreamO1 {
|
||||
std::shared_ptr<HiDreamO1VisionRunner> vision_runner;
|
||||
|
||||
HiDreamO1Conditioner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {})
|
||||
: vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, params_backend, tensor_storage_map)) {}
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: vision_runner(std::make_shared<HiDreamO1VisionRunner>(backend, tensor_storage_map, "model.visual", weight_manager)) {}
|
||||
|
||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||
vision_runner->get_param_tensors(tensors);
|
||||
@ -510,10 +510,6 @@ namespace HiDreamO1 {
|
||||
vision_runner->set_weight_adapter(adapter);
|
||||
}
|
||||
|
||||
void set_weight_manager(const std::shared_ptr<RunnerWeightManager>& manager) override {
|
||||
vision_runner->set_weight_manager(manager);
|
||||
}
|
||||
|
||||
void runner_done() override {
|
||||
vision_runner->runner_done();
|
||||
}
|
||||
|
||||
@ -449,10 +449,10 @@ namespace Ideogram4 {
|
||||
std::vector<int32_t> image_indicator_vec;
|
||||
|
||||
Ideogram4Runner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(Ideogram4Config::detect_from_weights(tensor_storage_map, prefix)),
|
||||
uncond_prefix(prefix + ".uncond") {
|
||||
model = Ideogram4Transformer(config);
|
||||
|
||||
@ -356,10 +356,10 @@ namespace Lens {
|
||||
std::vector<float> pe_vec;
|
||||
|
||||
LensRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(LensConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
lens = LensModel(config);
|
||||
lens.init(params_ctx, tensor_storage_map, prefix);
|
||||
|
||||
@ -1686,10 +1686,10 @@ namespace LTXV {
|
||||
sd::Tensor<float> ax_input_cache;
|
||||
|
||||
LTXAVRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model.diffusion_model")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string& prefix = "model.diffusion_model",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(LTXAVConfig::detect_from_weights(tensor_storage_map, prefix)),
|
||||
model(config) {
|
||||
model.init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -2025,7 +2025,8 @@ namespace LTXV {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(model_path, "model.diffusion_model.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||
return;
|
||||
@ -2040,19 +2041,18 @@ namespace LTXV {
|
||||
|
||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||
std::shared_ptr<LTXAVRunner> ltxav = std::make_shared<LTXAVRunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
|
||||
if (!ltxav->alloc_params_buffer()) {
|
||||
LOG_ERROR("ltxav buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
ltxav->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
if (!model_loader.load_tensors(tensors)) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("LTXAV test",
|
||||
*ltxav,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register ltxav tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -879,10 +879,10 @@ struct MMDiTRunner : public DiffusionModelRunner {
|
||||
MMDiT mmdit;
|
||||
|
||||
MMDiTRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(MMDiTConfig::detect_from_weights(tensor_storage_map, prefix)),
|
||||
mmdit(config) {
|
||||
mmdit.init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -1001,28 +1001,25 @@ struct MMDiTRunner : public DiffusionModelRunner {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, backend);
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, String2TensorStorage{}, "", model_manager);
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
if (!mmdit->alloc_params_buffer()) {
|
||||
LOG_ERROR("mmdit embeds buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
mmdit->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
ModelLoader model_loader;
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("MMDiT test",
|
||||
*mmdit,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register mmdit tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__
|
||||
#ifndef __SD_MODEL_DIFFUSION_MODEL_HPP__
|
||||
#define __SD_MODEL_DIFFUSION_MODEL_HPP__
|
||||
|
||||
#include <string>
|
||||
@ -7,6 +7,7 @@
|
||||
|
||||
#include "core/ggml_extend.hpp"
|
||||
#include "core/tensor_ggml.hpp"
|
||||
#include "model_manager.h"
|
||||
|
||||
struct UNetDiffusionExtra {
|
||||
int num_video_frames = -1;
|
||||
@ -88,9 +89,9 @@ protected:
|
||||
|
||||
public:
|
||||
DiffusionModelRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const std::string& prefix)
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const std::string& prefix,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
prefix(prefix) {}
|
||||
|
||||
virtual sd::Tensor<float> compute(int n_threads,
|
||||
|
||||
@ -710,10 +710,10 @@ namespace Pid {
|
||||
std::vector<float> pixel_pos_comp_vec;
|
||||
|
||||
PiDRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix = "model.diffusion_model")
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const std::string prefix = "model.diffusion_model",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(PixelDiTConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
model = PixelDiT(config);
|
||||
model.init(params_ctx, tensor_storage_map, prefix);
|
||||
|
||||
@ -518,12 +518,12 @@ namespace Qwen {
|
||||
SDVersion version;
|
||||
|
||||
QwenImageRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_QWEN_IMAGE,
|
||||
bool zero_cond_t = false)
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_QWEN_IMAGE,
|
||||
bool zero_cond_t = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(QwenImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
config.zero_cond_t = config.zero_cond_t || zero_cond_t;
|
||||
qwen_image = QwenImageModel(config);
|
||||
@ -691,7 +691,8 @@ namespace Qwen {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -705,23 +706,20 @@ namespace Qwen {
|
||||
}
|
||||
|
||||
std::shared_ptr<QwenImageRunner> qwen_image = std::make_shared<QwenImageRunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
VERSION_QWEN_IMAGE);
|
||||
VERSION_QWEN_IMAGE,
|
||||
false,
|
||||
model_manager);
|
||||
|
||||
if (!qwen_image->alloc_params_buffer()) {
|
||||
LOG_ERROR("qwen_image buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
qwen_image->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("Qwen image test",
|
||||
*qwen_image,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register qwen_image tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -694,11 +694,11 @@ struct UNetModelRunner : public DiffusionModelRunner {
|
||||
UnetModelBlock unet;
|
||||
|
||||
UNetModelRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
SDVersion version = VERSION_SD1)
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
SDVersion version = VERSION_SD1,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(UNetConfig::detect_from_weights(tensor_storage_map, prefix, version)),
|
||||
unet(config) {
|
||||
unet.init(params_ctx, tensor_storage_map, prefix);
|
||||
|
||||
@ -799,11 +799,11 @@ namespace WAN {
|
||||
SDVersion version;
|
||||
|
||||
WanRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_WAN2)
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_WAN2,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(WanConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
if (config.num_layers == 30) {
|
||||
if (version == VERSION_WAN2_2_TI2V) {
|
||||
@ -1017,7 +1017,8 @@ namespace WAN {
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -1031,23 +1032,19 @@ namespace WAN {
|
||||
}
|
||||
|
||||
std::shared_ptr<WanRunner> wan = std::make_shared<WanRunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
VERSION_WAN2_2_TI2V);
|
||||
VERSION_WAN2_2_TI2V,
|
||||
model_manager);
|
||||
|
||||
if (!wan->alloc_params_buffer()) {
|
||||
LOG_ERROR("wan buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
wan->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("Wan test",
|
||||
*wan,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register wan tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -553,11 +553,11 @@ namespace ZImage {
|
||||
SDVersion version;
|
||||
|
||||
ZImageRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_Z_IMAGE)
|
||||
: DiffusionModelRunner(backend, params_backend, prefix),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
SDVersion version = VERSION_Z_IMAGE,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: DiffusionModelRunner(backend, prefix, weight_manager),
|
||||
config(ZImageConfig::detect_from_weights(tensor_storage_map, prefix)) {
|
||||
z_image = ZImageModel(config);
|
||||
z_image.init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -698,7 +698,8 @@ namespace ZImage {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "model.diffusion_model.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -714,22 +715,19 @@ namespace ZImage {
|
||||
}
|
||||
|
||||
std::shared_ptr<ZImageRunner> z_image = std::make_shared<ZImageRunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
VERSION_QWEN_IMAGE);
|
||||
VERSION_QWEN_IMAGE,
|
||||
model_manager);
|
||||
|
||||
if (!z_image->alloc_params_buffer()) {
|
||||
LOG_ERROR("z_image buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
z_image->get_param_tensors(tensors, "model.diffusion_model");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("ZImage test",
|
||||
*z_image,
|
||||
"model.diffusion_model",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register z_image tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_TE_CLIP_HPP__
|
||||
#ifndef __SD_MODEL_TE_CLIP_HPP__
|
||||
#define __SD_MODEL_TE_CLIP_HPP__
|
||||
|
||||
#include "core/ggml_extend.hpp"
|
||||
@ -469,13 +469,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
||||
std::vector<float> attention_mask_vec;
|
||||
|
||||
CLIPTextModelRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||
bool with_final_ln = true,
|
||||
bool force_clip_f32 = false)
|
||||
: GGMLRunner(backend, params_backend) {
|
||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||
bool with_final_ln = true,
|
||||
bool force_clip_f32 = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager) {
|
||||
bool proj_in = false;
|
||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||
if (!starts_with(name, prefix)) {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_TE_LLM_HPP__
|
||||
#ifndef __SD_MODEL_TE_LLM_HPP__
|
||||
#define __SD_MODEL_TE_LLM_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
@ -22,6 +22,7 @@
|
||||
#include "json.hpp"
|
||||
#include "model/common/rope.hpp"
|
||||
#include "model_loader.h"
|
||||
#include "model_manager.h"
|
||||
#include "tokenizers/bpe_tokenizer.h"
|
||||
#include "tokenizers/gemma_tokenizer.h"
|
||||
#include "tokenizers/gpt_oss_tokenizer.h"
|
||||
@ -1571,11 +1572,11 @@ namespace LLM {
|
||||
public:
|
||||
LLMRunner(LLMArch arch,
|
||||
ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
bool enable_vision_ = false)
|
||||
: GGMLRunner(backend, params_backend),
|
||||
bool enable_vision_ = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
config(LLMConfig::detect_from_weights(tensor_storage_map, prefix, arch)),
|
||||
enable_vision(enable_vision_) {
|
||||
if (enable_vision && !config.have_vision_weight) {
|
||||
@ -1822,11 +1823,11 @@ namespace LLM {
|
||||
|
||||
LLMEmbedder(LLMArch arch,
|
||||
ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool enable_vision = false)
|
||||
: model(arch, backend, params_backend, tensor_storage_map, prefix, enable_vision) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool enable_vision = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: model(arch, backend, tensor_storage_map, prefix, enable_vision, weight_manager) {
|
||||
if (arch == LLMArch::MISTRAL_SMALL_3_2 || arch == LLMArch::MINISTRAL_3_3B) {
|
||||
tokenizer = std::make_shared<MistralTokenizer>();
|
||||
} else if (arch == LLMArch::GPT_OSS_20B) {
|
||||
@ -1840,13 +1841,6 @@ namespace LLM {
|
||||
model.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
bool alloc_params_buffer() {
|
||||
if (!model.alloc_params_buffer()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::tuple<std::vector<int>, std::vector<float>> tokenize(std::string text,
|
||||
std::pair<int, int> attn_range,
|
||||
size_t max_length = 0,
|
||||
@ -2062,7 +2056,8 @@ namespace LLM {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "text_encoders.llm.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -2080,24 +2075,20 @@ namespace LLM {
|
||||
LLMArch arch = LLMArch::QWEN3;
|
||||
|
||||
std::shared_ptr<LLMEmbedder> llm = std::make_shared<LLMEmbedder>(arch,
|
||||
backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"text_encoders.llm",
|
||||
true);
|
||||
true,
|
||||
model_manager);
|
||||
|
||||
if (!llm->alloc_params_buffer()) {
|
||||
LOG_ERROR("llm model allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
llm->get_param_tensors(tensors, "text_encoders.llm");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("LLM test",
|
||||
*llm,
|
||||
"text_encoders.llm",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register llm tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_TE_T5_HPP__
|
||||
#ifndef __SD_MODEL_TE_T5_HPP__
|
||||
#define __SD_MODEL_TE_T5_HPP__
|
||||
|
||||
#include <cfloat>
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
#include "core/ggml_extend.hpp"
|
||||
#include "model_loader.h"
|
||||
#include "model_manager.h"
|
||||
#include "tokenizers/t5_unigram_tokenizer.h"
|
||||
|
||||
struct T5Config {
|
||||
@ -334,11 +335,11 @@ struct T5Runner : public GGMLRunner {
|
||||
std::vector<int> relative_position_bucket_vec;
|
||||
|
||||
T5Runner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
bool is_umt5 = false)
|
||||
: GGMLRunner(backend, params_backend),
|
||||
bool is_umt5 = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
config(T5Config::detect_from_weights(tensor_storage_map, prefix, is_umt5)) {
|
||||
model = T5(config);
|
||||
model.init(params_ctx, tensor_storage_map, prefix);
|
||||
@ -477,24 +478,17 @@ struct T5Embedder {
|
||||
T5Runner model;
|
||||
|
||||
T5Embedder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool is_umt5 = false)
|
||||
: model(backend, params_backend, tensor_storage_map, prefix, is_umt5), tokenizer(is_umt5) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool is_umt5 = false,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: model(backend, tensor_storage_map, prefix, is_umt5, weight_manager), tokenizer(is_umt5) {
|
||||
}
|
||||
|
||||
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||
model.get_param_tensors(tensors, prefix);
|
||||
}
|
||||
|
||||
bool alloc_params_buffer() {
|
||||
if (!model.alloc_params_buffer()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::tuple<std::vector<int>, std::vector<float>, std::vector<float>> tokenize(std::string text,
|
||||
size_t max_length = 0,
|
||||
bool padding = false) {
|
||||
@ -579,7 +573,8 @@ struct T5Embedder {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
@ -592,19 +587,16 @@ struct T5Embedder {
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, backend, tensor_storage_map, "", true);
|
||||
std::shared_ptr<T5Embedder> t5 = std::make_shared<T5Embedder>(backend, tensor_storage_map, "", true, model_manager);
|
||||
|
||||
if (!t5->alloc_params_buffer()) {
|
||||
LOG_ERROR("t5 params buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
t5->get_param_tensors(tensors, "");
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("T5 test",
|
||||
*t5,
|
||||
"",
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register t5 tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
||||
#ifndef __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
||||
#define __SD_MODEL_UPSCALER_ESRGAN_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
@ -229,9 +229,9 @@ struct ESRGAN : public GGMLRunner {
|
||||
std::unique_ptr<RRDBNet> rrdb_net;
|
||||
|
||||
ESRGAN(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {})
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
config(ESRGANConfig::detect_from_weights(tensor_storage_map)),
|
||||
rrdb_net(std::make_unique<RRDBNet>(config)) {
|
||||
rrdb_net->init(params_ctx, tensor_storage_map, "");
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
||||
#ifndef __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
||||
#define __SD_MODEL_UPSCALER_LTX_LATENT_UPSCALER_HPP__
|
||||
|
||||
#include <algorithm>
|
||||
@ -433,9 +433,9 @@ namespace LTXVUpsampler {
|
||||
std::unique_ptr<LatentUpsampler> model;
|
||||
|
||||
LatentUpsamplerRunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map)
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
config(LatentUpsamplerConfig::detect_from_weights(tensor_storage_map)) {
|
||||
if (config.dims != 3 || (!config.spatial_upsample && !config.temporal_upsample) ||
|
||||
config.spatial_up_num < 1 || config.spatial_down_den < 1 || config.temporal_up_factor < 1) {
|
||||
|
||||
@ -213,9 +213,9 @@ protected:
|
||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||
}
|
||||
|
||||
float get_alpha() {
|
||||
float alpha = ggml_ext_backend_tensor_get_f32(params["mix_factor"]);
|
||||
return sigmoid(alpha);
|
||||
ggml_tensor* get_alpha(GGMLRunnerContext* ctx) {
|
||||
auto mix_factor = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["mix_factor"]);
|
||||
return ggml_sigmoid(ctx->ggml_ctx, mix_factor);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -250,10 +250,12 @@ public:
|
||||
|
||||
x = time_stack->forward(ctx, x); // b t c (h w)
|
||||
|
||||
float alpha = get_alpha();
|
||||
x = ggml_add(ctx->ggml_ctx,
|
||||
ggml_ext_scale(ctx->ggml_ctx, x, alpha),
|
||||
ggml_ext_scale(ctx->ggml_ctx, x_mix, 1.0f - alpha));
|
||||
auto alpha = get_alpha(ctx);
|
||||
x = ggml_add(ctx->ggml_ctx,
|
||||
x_mix,
|
||||
ggml_mul(ctx->ggml_ctx,
|
||||
ggml_sub(ctx->ggml_ctx, x, x_mix),
|
||||
alpha));
|
||||
|
||||
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
|
||||
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
|
||||
@ -664,13 +666,13 @@ struct AutoEncoderKL : public VAE {
|
||||
AutoEncoderKLModel ae;
|
||||
|
||||
AutoEncoderKL(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
bool decode_only = false,
|
||||
bool use_video_decoder = false,
|
||||
SDVersion version = VERSION_SD1)
|
||||
: VAE(version, backend, params_backend, prefix), decode_only(decode_only) {
|
||||
bool decode_only = false,
|
||||
bool use_video_decoder = false,
|
||||
SDVersion version = VERSION_SD1,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only) {
|
||||
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
||||
scale_factor = 0.18215f;
|
||||
shift_factor = 0.f;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
||||
#ifndef __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
||||
#define __SD_MODEL_VAE_LTX_AUDIO_VAE_HPP__
|
||||
|
||||
#include <cmath>
|
||||
@ -9,6 +9,7 @@
|
||||
|
||||
#include "core/ggml_extend.hpp"
|
||||
#include "model_loader.h"
|
||||
#include "model_manager.h"
|
||||
|
||||
namespace LTXV {
|
||||
|
||||
@ -1001,10 +1002,10 @@ namespace LTXV {
|
||||
sd::Tensor<float> bwe_skip_filter_tensor;
|
||||
|
||||
LTXAudioVAERunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string& prefix = "")
|
||||
: GGMLRunner(backend, params_backend),
|
||||
const std::string& prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: GGMLRunner(backend, weight_manager),
|
||||
weight_prefix(prefix),
|
||||
config(LTXAudioVAEConfig::detect_from_weights(tensor_storage_map)),
|
||||
model(config) {
|
||||
@ -1019,7 +1020,7 @@ namespace LTXV {
|
||||
model.get_param_tensors(tensors, weight_prefix);
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
size_t get_params_mem_size() {
|
||||
return model.get_params_mem_size();
|
||||
}
|
||||
|
||||
@ -1066,7 +1067,8 @@ namespace LTXV {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file(model_path)) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||
return;
|
||||
@ -1074,20 +1076,17 @@ namespace LTXV {
|
||||
|
||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||
auto ltx_audio_vae = std::make_shared<LTXAudioVAERunner>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
prefix);
|
||||
prefix,
|
||||
model_manager);
|
||||
|
||||
if (!ltx_audio_vae->alloc_params_buffer()) {
|
||||
LOG_ERROR("ltx audio vae buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
ltx_audio_vae->get_param_tensors(tensors);
|
||||
|
||||
if (!model_loader.load_tensors(tensors)) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("LTX audio VAE test",
|
||||
*ltx_audio_vae,
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register ltx audio vae tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -957,8 +957,8 @@ namespace LTXVAE {
|
||||
|
||||
ggml_tensor* scaled_timestep = timestep;
|
||||
if (timestep_conditioning) {
|
||||
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]);
|
||||
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier);
|
||||
auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
|
||||
scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
|
||||
}
|
||||
|
||||
x = conv_in->forward(ctx, x, causal_decoder);
|
||||
@ -1008,8 +1008,8 @@ namespace LTXVAE {
|
||||
|
||||
ggml_tensor* scaled_timestep = timestep;
|
||||
if (timestep_conditioning && timestep != nullptr) {
|
||||
auto multiplier = ggml_ext_backend_tensor_get_f32(params["timestep_scale_multiplier"]);
|
||||
scaled_timestep = ggml_ext_scale(ctx->ggml_ctx, timestep, multiplier);
|
||||
auto multiplier = ggml_ext_cast_f32(ctx->ggml_ctx, ctx->backend, params["timestep_scale_multiplier"]);
|
||||
scaled_timestep = ggml_mul(ctx->ggml_ctx, timestep, multiplier);
|
||||
}
|
||||
|
||||
// conv_in with feat_map for left temporal context
|
||||
@ -1223,11 +1223,11 @@ struct LTXVideoVAE : public VAE {
|
||||
LTXVAE::VideoVAE vae;
|
||||
|
||||
LTXVideoVAE(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string& prefix,
|
||||
bool decode_only = true,
|
||||
SDVersion version = VERSION_LTXAV)
|
||||
bool decode_only = true,
|
||||
SDVersion version = VERSION_LTXAV,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: decode_only(decode_only),
|
||||
ltx_vae_version(LTXVAE::detect_ltx_vae_version(tensor_storage_map, prefix)),
|
||||
timestep_conditioning(LTXVAE::detect_ltx_vae_timestep_conditioning(tensor_storage_map, prefix)),
|
||||
@ -1239,7 +1239,7 @@ struct LTXVideoVAE : public VAE {
|
||||
patch_size,
|
||||
tensor_storage_map,
|
||||
prefix),
|
||||
VAE(version, backend, params_backend, prefix) {
|
||||
VAE(version, backend, prefix, weight_manager) {
|
||||
vae.init(params_ctx, tensor_storage_map, prefix);
|
||||
decode_timestep_tensor.values()[0] = vae.decode_timestep;
|
||||
}
|
||||
@ -1521,7 +1521,8 @@ struct LTXVideoVAE : public VAE {
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
|
||||
|
||||
ModelLoader model_loader;
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(model_path, "vae.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", model_path.c_str());
|
||||
return;
|
||||
@ -1529,22 +1530,19 @@ struct LTXVideoVAE : public VAE {
|
||||
|
||||
auto& tensor_storage_map = model_loader.get_tensor_storage_map();
|
||||
std::shared_ptr<LTXVideoVAE> vae = std::make_shared<LTXVideoVAE>(backend,
|
||||
backend,
|
||||
tensor_storage_map,
|
||||
"first_stage_model",
|
||||
true,
|
||||
VERSION_LTXAV);
|
||||
VERSION_LTXAV,
|
||||
model_manager);
|
||||
|
||||
if (!vae->alloc_params_buffer()) {
|
||||
LOG_ERROR("vae buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
vae->get_param_tensors(tensors);
|
||||
|
||||
if (!model_loader.load_tensors(tensors)) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("LTX VAE test",
|
||||
*vae,
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register ltx vae tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -623,12 +623,12 @@ struct TinyImageAutoEncoder : public VAE {
|
||||
bool decode_only = false;
|
||||
|
||||
TinyImageAutoEncoder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
bool decoder_only = true,
|
||||
SDVersion version = VERSION_SD1)
|
||||
: VAE(version, backend, params_backend, "tae"),
|
||||
bool decoder_only = true,
|
||||
SDVersion version = VERSION_SD1,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: VAE(version, backend, "tae", weight_manager),
|
||||
decode_only(decoder_only),
|
||||
taesd(decoder_only, version) {
|
||||
scale_input = false;
|
||||
@ -686,12 +686,12 @@ struct TinyVideoAutoEncoder : public VAE {
|
||||
bool is_wide = false;
|
||||
|
||||
TinyVideoAutoEncoder(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map,
|
||||
const std::string prefix,
|
||||
bool decoder_only = true,
|
||||
SDVersion version = VERSION_WAN2)
|
||||
: VAE(version, backend, params_backend, "tae"),
|
||||
bool decoder_only = true,
|
||||
SDVersion version = VERSION_WAN2,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: VAE(version, backend, "tae", weight_manager),
|
||||
decode_only(decoder_only) {
|
||||
for (auto tensor_storage : tensor_storage_map) {
|
||||
if (tensor_storage.first.find(prefix + ".3.conv.6.weight") != std::string::npos) {
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
#ifndef __SD_MODEL_VAE_VAE_HPP__
|
||||
#ifndef __SD_MODEL_VAE_VAE_HPP__
|
||||
#define __SD_MODEL_VAE_VAE_HPP__
|
||||
|
||||
#include "core/tensor_ggml.hpp"
|
||||
#include "model/common/block.hpp"
|
||||
#include "model_manager.h"
|
||||
|
||||
struct VAE : public GGMLRunner {
|
||||
protected:
|
||||
@ -63,8 +64,11 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
VAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend, const std::string& weight_prefix = "")
|
||||
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, params_backend) {}
|
||||
VAE(SDVersion version,
|
||||
ggml_backend_t backend,
|
||||
const std::string& weight_prefix = "",
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: version(version), weight_prefix(weight_prefix), GGMLRunner(backend, weight_manager) {}
|
||||
|
||||
int get_scale_factor() {
|
||||
int scale_factor = 8;
|
||||
@ -224,8 +228,10 @@ public:
|
||||
};
|
||||
|
||||
struct FakeVAE : public VAE {
|
||||
FakeVAE(SDVersion version, ggml_backend_t backend, ggml_backend_t params_backend)
|
||||
: VAE(version, backend, params_backend) {}
|
||||
FakeVAE(SDVersion version,
|
||||
ggml_backend_t backend,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: VAE(version, backend, "", weight_manager) {}
|
||||
|
||||
int get_encoder_output_channels(int input_channels) {
|
||||
return input_channels;
|
||||
|
||||
@ -1124,12 +1124,12 @@ namespace WAN {
|
||||
WanVAE ae;
|
||||
|
||||
WanVAERunner(ggml_backend_t backend,
|
||||
ggml_backend_t params_backend,
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool decode_only = false,
|
||||
SDVersion version = VERSION_WAN2)
|
||||
: VAE(version, backend, params_backend, prefix), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) {
|
||||
const String2TensorStorage& tensor_storage_map = {},
|
||||
const std::string prefix = "",
|
||||
bool decode_only = false,
|
||||
SDVersion version = VERSION_WAN2,
|
||||
std::shared_ptr<RunnerWeightManager> weight_manager = nullptr)
|
||||
: VAE(version, backend, prefix, weight_manager), decode_only(decode_only), ae(decode_only, version == VERSION_WAN2_2_TI2V) {
|
||||
ae.init(params_ctx, tensor_storage_map, prefix);
|
||||
}
|
||||
|
||||
@ -1327,27 +1327,24 @@ namespace WAN {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = sd_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V);
|
||||
auto model_manager = std::make_shared<ModelManager>();
|
||||
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, String2TensorStorage{}, "first_stage_model", false, VERSION_WAN2_2_TI2V, model_manager);
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
if (!vae->alloc_params_buffer()) {
|
||||
LOG_ERROR("vae buffer allocation failed");
|
||||
return;
|
||||
}
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
vae->get_param_tensors(tensors);
|
||||
|
||||
ModelLoader model_loader;
|
||||
ModelLoader& model_loader = model_manager->loader();
|
||||
if (!model_loader.init_from_file_and_convert_name(file_path, "vae.")) {
|
||||
LOG_ERROR("init model loader from file failed: '%s'", file_path.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
bool success = model_loader.load_tensors(tensors);
|
||||
|
||||
if (!success) {
|
||||
LOG_ERROR("load tensors from model loader failed");
|
||||
if (!model_manager->register_runner_params("Wan VAE test",
|
||||
*vae,
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
backend,
|
||||
backend) ||
|
||||
!model_manager->validate_registered_tensors()) {
|
||||
LOG_ERROR("register wan vae tensors with model manager failed");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -121,6 +121,42 @@ public:
|
||||
ggml_backend_t compute_backend,
|
||||
ggml_backend_t params_backend,
|
||||
size_t* registered_tensor_size = nullptr);
|
||||
|
||||
template <typename Runner>
|
||||
bool register_runner_params(const std::string& desc,
|
||||
Runner& runner,
|
||||
ResidencyMode residency_mode,
|
||||
ggml_backend_t compute_backend,
|
||||
ggml_backend_t params_backend,
|
||||
size_t* registered_tensor_size = nullptr) {
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
runner.get_param_tensors(tensors);
|
||||
return register_param_tensors(desc,
|
||||
std::move(tensors),
|
||||
residency_mode,
|
||||
compute_backend,
|
||||
params_backend,
|
||||
registered_tensor_size);
|
||||
}
|
||||
|
||||
template <typename Runner>
|
||||
bool register_runner_params(const std::string& desc,
|
||||
Runner& runner,
|
||||
const std::string& prefix,
|
||||
ResidencyMode residency_mode,
|
||||
ggml_backend_t compute_backend,
|
||||
ggml_backend_t params_backend,
|
||||
size_t* registered_tensor_size = nullptr) {
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
runner.get_param_tensors(tensors, prefix);
|
||||
return register_param_tensors(desc,
|
||||
std::move(tensors),
|
||||
residency_mode,
|
||||
compute_backend,
|
||||
params_backend,
|
||||
registered_tensor_size);
|
||||
}
|
||||
|
||||
bool validate_registered_tensors();
|
||||
|
||||
bool prepare_params(const std::vector<ggml_tensor*>& tensors) override;
|
||||
|
||||
@ -241,7 +241,6 @@ public:
|
||||
}
|
||||
std::map<std::string, ggml_tensor*> group_tensors;
|
||||
model->get_param_tensors(group_tensors);
|
||||
model->set_weight_manager(model_manager);
|
||||
if (model_manager == nullptr) {
|
||||
return true;
|
||||
}
|
||||
@ -586,33 +585,35 @@ public:
|
||||
|
||||
if (sd_version_is_sd3(version)) {
|
||||
cond_stage_model = std::make_shared<SD3CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<MMDiTRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_pid(version)) {
|
||||
vae_decode_only = false;
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version);
|
||||
diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model.net");
|
||||
} else if (sd_version_is_ideogram4(version)) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version,
|
||||
"",
|
||||
false);
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Pid::PiDRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model.net",
|
||||
model_manager);
|
||||
} else if (sd_version_is_ideogram4(version)) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version,
|
||||
"",
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Ideogram4::Ideogram4Runner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_flux(version)) {
|
||||
bool is_chroma = false;
|
||||
for (auto pair : tensor_storage_map) {
|
||||
@ -623,66 +624,71 @@ public:
|
||||
}
|
||||
if (is_chroma) {
|
||||
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
sd_ctx_params->chroma_use_t5_mask,
|
||||
sd_ctx_params->chroma_t5_mask_pad);
|
||||
sd_ctx_params->chroma_t5_mask_pad,
|
||||
false,
|
||||
model_manager);
|
||||
} else if (version == VERSION_OVIS_IMAGE) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version,
|
||||
"",
|
||||
false);
|
||||
false,
|
||||
model_manager);
|
||||
} else {
|
||||
cond_stage_model = std::make_shared<FluxCLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
model_manager);
|
||||
}
|
||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version,
|
||||
sd_ctx_params->chroma_use_dit_mask);
|
||||
sd_ctx_params->chroma_use_dit_mask,
|
||||
model_manager);
|
||||
} else if (sd_version_is_flux2(version)) {
|
||||
bool is_chroma = false;
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version);
|
||||
version,
|
||||
"",
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version,
|
||||
sd_ctx_params->chroma_use_dit_mask);
|
||||
sd_ctx_params->chroma_use_dit_mask,
|
||||
model_manager);
|
||||
} else if (sd_version_is_ltxav(version)) {
|
||||
cond_stage_model = std::make_shared<LTXAVEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
"text_encoders.llm",
|
||||
"text_embedding_projection",
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<LTXV::LTXAVRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_wan(version)) {
|
||||
cond_stage_model = std::make_shared<T5CLIPEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
true,
|
||||
0,
|
||||
true);
|
||||
true,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
if (strlen(SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path)) > 0) {
|
||||
high_noise_diffusion_model = std::make_shared<WAN::WanRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.high_noise_diffusion_model",
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
}
|
||||
if (diffusion_model->get_desc() == "Wan2.1-I2V-14B" ||
|
||||
diffusion_model->get_desc() == "Wan2.1-FLF2V-14B" ||
|
||||
@ -691,8 +697,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend_for(SDBackendModule::CLIP_VISION),
|
||||
params_backend_for(SDBackendModule::CLIP_VISION),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
model_manager);
|
||||
clip_vision->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||
if (!register_runner_params("CLIP vision",
|
||||
clip_vision,
|
||||
@ -706,93 +712,99 @@ public:
|
||||
enable_vision = true;
|
||||
}
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version,
|
||||
"",
|
||||
enable_vision);
|
||||
enable_vision,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Qwen::QwenImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version,
|
||||
sd_ctx_params->qwen_image_zero_cond_t);
|
||||
sd_ctx_params->qwen_image_zero_cond_t,
|
||||
model_manager);
|
||||
} else if (sd_version_is_longcat(version)) {
|
||||
bool enable_vision = false;
|
||||
if (!vae_decode_only) {
|
||||
enable_vision = true;
|
||||
}
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version,
|
||||
"",
|
||||
enable_vision);
|
||||
enable_vision,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Flux::FluxRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version,
|
||||
sd_ctx_params->chroma_use_dit_mask);
|
||||
sd_ctx_params->chroma_use_dit_mask,
|
||||
model_manager);
|
||||
} else if (version == VERSION_HIDREAM_O1) {
|
||||
cond_stage_model = std::make_shared<HiDreamO1::HiDreamO1Conditioner>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<HiDreamO1::HiDreamO1Runner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model");
|
||||
"model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_anima(version)) {
|
||||
cond_stage_model = std::make_shared<AnimaConditioner>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Anima::AnimaRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_z_image(version)) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version);
|
||||
version,
|
||||
"",
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<ZImage::ZImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
} else if (sd_version_is_ernie_image(version)) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version);
|
||||
version,
|
||||
"",
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<ErnieImage::ErnieImageRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else if (sd_version_is_lens(version)) {
|
||||
cond_stage_model = std::make_shared<LLMEmbedder>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
version);
|
||||
version,
|
||||
"",
|
||||
false,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<Lens::LensRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model");
|
||||
"model.diffusion_model",
|
||||
model_manager);
|
||||
} else { // SD1.x SD2.x SDXL
|
||||
std::map<std::string, std::string> embbeding_map;
|
||||
for (uint32_t i = 0; i < sd_ctx_params->embedding_count; i++) {
|
||||
embbeding_map.emplace(SAFE_STR(sd_ctx_params->embeddings[i].name), SAFE_STR(sd_ctx_params->embeddings[i].path));
|
||||
}
|
||||
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend_for(SDBackendModule::TE),
|
||||
params_backend_for(SDBackendModule::TE),
|
||||
tensor_storage_map,
|
||||
embbeding_map,
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
diffusion_model = std::make_shared<UNetModelRunner>(backend_for(SDBackendModule::DIFFUSION),
|
||||
params_backend_for(SDBackendModule::DIFFUSION),
|
||||
tensor_storage_map,
|
||||
"model.diffusion_model",
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
if (sd_ctx_params->diffusion_conv_direct) {
|
||||
LOG_INFO("Using Conv2d direct in the diffusion model");
|
||||
diffusion_model->set_conv2d_direct_enabled(true);
|
||||
@ -841,19 +853,19 @@ public:
|
||||
sd_version_is_anima(version) ||
|
||||
sd_version_is_ltxav(version)) {
|
||||
return std::make_shared<TinyVideoAutoEncoder>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map,
|
||||
"decoder",
|
||||
vae_decode_only,
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
|
||||
} else {
|
||||
auto model = std::make_shared<TinyImageAutoEncoder>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map,
|
||||
"decoder.layers",
|
||||
vae_decode_only,
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
return model;
|
||||
}
|
||||
};
|
||||
@ -871,28 +883,28 @@ public:
|
||||
auto create_vae = [&]() -> std::shared_ptr<VAE> {
|
||||
if (sd_version_is_ltxav(version)) {
|
||||
return std::make_shared<LTXVideoVAE>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map,
|
||||
"first_stage_model",
|
||||
vae_decode_only,
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
} else if (sd_version_is_wan(version) ||
|
||||
sd_version_is_qwen_image(version) ||
|
||||
sd_version_is_anima(version)) {
|
||||
return std::make_shared<WAN::WanVAERunner>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map,
|
||||
"first_stage_model",
|
||||
vae_decode_only,
|
||||
version);
|
||||
version,
|
||||
model_manager);
|
||||
} else {
|
||||
auto model = std::make_shared<AutoEncoderKL>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map,
|
||||
"first_stage_model",
|
||||
vae_decode_only,
|
||||
false,
|
||||
vae_version);
|
||||
vae_version,
|
||||
model_manager);
|
||||
if (sd_version_is_sdxl(version) &&
|
||||
(strlen(SAFE_STR(sd_ctx_params->vae_path)) == 0 || sd_ctx_params->force_sdxl_vae_conv_scale || external_vae_is_invalid)) {
|
||||
float vae_conv_2d_scale = 1.f / 32.f;
|
||||
@ -910,7 +922,7 @@ public:
|
||||
LOG_INFO("using FakeVAE");
|
||||
first_stage_model = std::make_shared<FakeVAE>(version,
|
||||
backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE));
|
||||
model_manager);
|
||||
if (!register_runner_params("VAE",
|
||||
first_stage_model,
|
||||
SDBackendModule::VAE,
|
||||
@ -952,8 +964,9 @@ public:
|
||||
|
||||
if (use_audio_vae) {
|
||||
audio_vae_model = std::make_shared<LTXV::LTXAudioVAERunner>(backend_for(SDBackendModule::VAE),
|
||||
params_backend_for(SDBackendModule::VAE),
|
||||
tensor_storage_map);
|
||||
tensor_storage_map,
|
||||
"",
|
||||
model_manager);
|
||||
if (!register_runner_params("LTX audio VAE",
|
||||
audio_vae_model,
|
||||
SDBackendModule::VAE,
|
||||
@ -977,7 +990,9 @@ public:
|
||||
control_net = std::make_shared<ControlNet>(backend_for(SDBackendModule::CONTROL_NET),
|
||||
params_backend_for(SDBackendModule::CONTROL_NET),
|
||||
model_loader.get_tensor_storage_map(),
|
||||
version);
|
||||
version,
|
||||
"",
|
||||
model_manager);
|
||||
if (sd_ctx_params->diffusion_conv_direct) {
|
||||
LOG_INFO("Using Conv2d direct in the control net");
|
||||
control_net->set_conv2d_direct_enabled(true);
|
||||
@ -998,6 +1013,7 @@ public:
|
||||
version,
|
||||
tensor_storage_map,
|
||||
model_loader,
|
||||
model_manager,
|
||||
n_threads,
|
||||
[this](SDBackendModule module) { return ensure_backend_pair(module); },
|
||||
[this](SDBackendModule module) { return backend_for(module); },
|
||||
@ -5008,8 +5024,8 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
|
||||
|
||||
std::unique_ptr<LTXVUpsampler::LatentUpsamplerRunner> upsampler =
|
||||
std::make_unique<LTXVUpsampler::LatentUpsamplerRunner>(sd_ctx->sd->backend_for(SDBackendModule::UPSCALER),
|
||||
sd_ctx->sd->params_backend_for(SDBackendModule::UPSCALER),
|
||||
model_loader.get_tensor_storage_map());
|
||||
model_loader.get_tensor_storage_map(),
|
||||
upsampler_manager);
|
||||
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
||||
upsampler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||
if (upsampler->model == nullptr) {
|
||||
@ -5019,7 +5035,6 @@ static sd::Tensor<float> upscale_ltx_spatial_video_latent(sd_ctx_t* sd_ctx,
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
upsampler->get_param_tensors(tensors);
|
||||
upsampler->set_weight_manager(upsampler_manager);
|
||||
if (!upsampler_manager->register_param_tensors("LTX latent upsampler",
|
||||
std::move(tensors),
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
|
||||
@ -90,8 +90,8 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
||||
model_loader.set_wtype_override(model_data_type);
|
||||
LOG_INFO("Upscaler weight type: %s", ggml_type_name(model_data_type));
|
||||
esrgan_upscaler = std::make_shared<ESRGAN>(backend_for(SDBackendModule::UPSCALER),
|
||||
params_backend_for(SDBackendModule::UPSCALER),
|
||||
model_loader.get_tensor_storage_map());
|
||||
model_loader.get_tensor_storage_map(),
|
||||
model_manager);
|
||||
if (esrgan_upscaler == nullptr || esrgan_upscaler->rrdb_net == nullptr) {
|
||||
LOG_ERROR("init esrgan model from metadata failed: '%s'", esrgan_path.c_str());
|
||||
return false;
|
||||
@ -104,7 +104,6 @@ bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
||||
|
||||
std::map<std::string, ggml_tensor*> tensors;
|
||||
esrgan_upscaler->get_param_tensors(tensors);
|
||||
esrgan_upscaler->set_weight_manager(model_manager);
|
||||
if (!model_manager->register_param_tensors("ESRGAN",
|
||||
std::move(tensors),
|
||||
ModelManager::ResidencyMode::Resident,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user