add mmproj file support

This commit is contained in:
leejet 2025-10-10 01:09:05 +08:00
parent 887055edce
commit 9fa817f014
5 changed files with 161 additions and 41 deletions

View File

@ -60,6 +60,7 @@ struct SDParams {
std::string clip_vision_path; std::string clip_vision_path;
std::string t5xxl_path; std::string t5xxl_path;
std::string qwen2vl_path; std::string qwen2vl_path;
std::string qwen2vl_vision_path;
std::string diffusion_model_path; std::string diffusion_model_path;
std::string high_noise_diffusion_model_path; std::string high_noise_diffusion_model_path;
std::string vae_path; std::string vae_path;
@ -146,6 +147,7 @@ void print_params(SDParams params) {
printf(" clip_vision_path: %s\n", params.clip_vision_path.c_str()); printf(" clip_vision_path: %s\n", params.clip_vision_path.c_str());
printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str()); printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str());
printf(" qwen2vl_path: %s\n", params.qwen2vl_path.c_str()); printf(" qwen2vl_path: %s\n", params.qwen2vl_path.c_str());
printf(" qwen2vl_vision_path: %s\n", params.qwen2vl_vision_path.c_str());
printf(" diffusion_model_path: %s\n", params.diffusion_model_path.c_str()); printf(" diffusion_model_path: %s\n", params.diffusion_model_path.c_str());
printf(" high_noise_diffusion_model_path: %s\n", params.high_noise_diffusion_model_path.c_str()); printf(" high_noise_diffusion_model_path: %s\n", params.high_noise_diffusion_model_path.c_str());
printf(" vae_path: %s\n", params.vae_path.c_str()); printf(" vae_path: %s\n", params.vae_path.c_str());
@ -218,6 +220,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --clip_vision path to the clip-vision encoder\n"); printf(" --clip_vision path to the clip-vision encoder\n");
printf(" --t5xxl path to the t5xxl text encoder\n"); printf(" --t5xxl path to the t5xxl text encoder\n");
printf(" --qwen2vl path to the qwen2vl text encoder\n"); printf(" --qwen2vl path to the qwen2vl text encoder\n");
printf(" --qwen2vl_vision path to the qwen2vl vit\n");
printf(" --vae [VAE] path to vae\n"); printf(" --vae [VAE] path to vae\n");
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n"); printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
printf(" --control-net [CONTROL_PATH] path to control net model\n"); printf(" --control-net [CONTROL_PATH] path to control net model\n");
@ -488,6 +491,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--clip_vision", "", &params.clip_vision_path}, {"", "--clip_vision", "", &params.clip_vision_path},
{"", "--t5xxl", "", &params.t5xxl_path}, {"", "--t5xxl", "", &params.t5xxl_path},
{"", "--qwen2vl", "", &params.qwen2vl_path}, {"", "--qwen2vl", "", &params.qwen2vl_path},
{"", "--qwen2vl_vision", "", &params.qwen2vl_vision_path},
{"", "--diffusion-model", "", &params.diffusion_model_path}, {"", "--diffusion-model", "", &params.diffusion_model_path},
{"", "--high-noise-diffusion-model", "", &params.high_noise_diffusion_model_path}, {"", "--high-noise-diffusion-model", "", &params.high_noise_diffusion_model_path},
{"", "--vae", "", &params.vae_path}, {"", "--vae", "", &params.vae_path},
@ -947,7 +951,7 @@ std::string get_image_params(SDParams params, int64_t seed) {
parameter_string += " " + std::string(sd_schedule_name(params.sample_params.scheduler)); parameter_string += " " + std::string(sd_schedule_name(params.sample_params.scheduler));
} }
parameter_string += ", "; parameter_string += ", ";
for (const auto& te : {params.clip_l_path, params.clip_g_path, params.t5xxl_path, params.qwen2vl_path}) { for (const auto& te : {params.clip_l_path, params.clip_g_path, params.t5xxl_path, params.qwen2vl_path, params.qwen2vl_vision_path}) {
if (!te.empty()) { if (!te.empty()) {
parameter_string += "TE: " + sd_basename(te) + ", "; parameter_string += "TE: " + sd_basename(te) + ", ";
} }
@ -1322,6 +1326,7 @@ int main(int argc, const char* argv[]) {
params.clip_vision_path.c_str(), params.clip_vision_path.c_str(),
params.t5xxl_path.c_str(), params.t5xxl_path.c_str(),
params.qwen2vl_path.c_str(), params.qwen2vl_path.c_str(),
params.qwen2vl_vision_path.c_str(),
params.diffusion_model_path.c_str(), params.diffusion_model_path.c_str(),
params.high_noise_diffusion_model_path.c_str(), params.high_noise_diffusion_model_path.c_str(),
params.vae_path.c_str(), params.vae_path.c_str(),

View File

@ -211,6 +211,24 @@ std::unordered_map<std::string, std::string> qwenvl_name_map{
{"output_norm.", "model.norm."}, {"output_norm.", "model.norm."},
}; };
std::unordered_map<std::string, std::string> qwenvl_vision_name_map{
{"mm.", "merger.mlp."},
{"v.post_ln.", "merger.ln_q."},
{"v.patch_embd.weight", "patch_embed.proj.0.weight"},
{"patch_embed.proj.0.weight.1", "patch_embed.proj.1.weight"},
{"v.patch_embd.weight.1", "patch_embed.proj.1.weight"},
{"v.blk.", "blocks."},
{"attn_q.", "attn.q_proj."},
{"attn_k.", "attn.k_proj."},
{"attn_v.", "attn.v_proj."},
{"attn_out.", "attn.proj."},
{"ffn_down.", "mlp.down_proj."},
{"ffn_gate.", "mlp.gate_proj."},
{"ffn_up.", "mlp.up_proj."},
{"ln1.", "norm1."},
{"ln2.", "norm2."},
};
std::string convert_cond_model_name(const std::string& name) { std::string convert_cond_model_name(const std::string& name) {
std::string new_name = name; std::string new_name = name;
std::string prefix; std::string prefix;
@ -269,12 +287,21 @@ std::string convert_cond_model_name(const std::string& name) {
new_name.replace(pos, 11, "layer.0.SelfAttention.relative_attention_bias."); new_name.replace(pos, 11, "layer.0.SelfAttention.relative_attention_bias.");
} }
} else if (contains(name, "qwen2vl")) { } else if (contains(name, "qwen2vl")) {
if (contains(name, "qwen2vl.visual")) {
for (auto kv : qwenvl_vision_name_map) {
size_t pos = new_name.find(kv.first);
if (pos != std::string::npos) {
new_name.replace(pos, kv.first.size(), kv.second);
}
}
} else {
for (auto kv : qwenvl_name_map) { for (auto kv : qwenvl_name_map) {
size_t pos = new_name.find(kv.first); size_t pos = new_name.find(kv.first);
if (pos != std::string::npos) { if (pos != std::string::npos) {
new_name.replace(pos, kv.first.size(), kv.second); new_name.replace(pos, kv.first.size(), kv.second);
} }
} }
}
} else if (name == "text_encoders.t5xxl.transformer.token_embd.weight") { } else if (name == "text_encoders.t5xxl.transformer.token_embd.weight") {
new_name = "text_encoders.t5xxl.transformer.shared.weight"; new_name = "text_encoders.t5xxl.transformer.shared.weight";
} }

View File

@ -363,16 +363,39 @@ namespace Qwen {
struct Qwen2_5_VisionPatchEmbed : public GGMLBlock { struct Qwen2_5_VisionPatchEmbed : public GGMLBlock {
protected: protected:
int64_t patch_size; bool llama_cpp_style;
int64_t temporal_patch_size; int patch_size;
int temporal_patch_size;
int64_t in_channels;
int64_t embed_dim; int64_t embed_dim;
public: public:
Qwen2_5_VisionPatchEmbed(int64_t patch_size = 14, Qwen2_5_VisionPatchEmbed(bool llama_cpp_style,
int64_t temporal_patch_size = 2, int patch_size = 14,
int temporal_patch_size = 2,
int64_t in_channels = 3, int64_t in_channels = 3,
int64_t embed_dim = 1152) int64_t embed_dim = 1152)
: patch_size(patch_size), temporal_patch_size(temporal_patch_size), embed_dim(embed_dim) { : llama_cpp_style(llama_cpp_style),
patch_size(patch_size),
temporal_patch_size(temporal_patch_size),
in_channels(in_channels),
embed_dim(embed_dim) {
if (llama_cpp_style) {
blocks["proj.0"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels,
embed_dim,
{patch_size, patch_size},
{patch_size, patch_size}, // stride
{0, 0}, // padding
{1, 1}, // dilation
false));
blocks["proj.1"] = std::shared_ptr<GGMLBlock>(new Conv2d(in_channels,
embed_dim,
{patch_size, patch_size},
{patch_size, patch_size}, // stride
{0, 0}, // padding
{1, 1}, // dilation
false));
} else {
std::tuple<int, int, int> kernel_size = {(int)temporal_patch_size, (int)patch_size, (int)patch_size}; std::tuple<int, int, int> kernel_size = {(int)temporal_patch_size, (int)patch_size, (int)patch_size};
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Conv3d(in_channels, blocks["proj"] = std::shared_ptr<GGMLBlock>(new Conv3d(in_channels,
embed_dim, embed_dim,
@ -382,19 +405,37 @@ namespace Qwen {
{1, 1, 1}, // dilation {1, 1, 1}, // dilation
false)); false));
} }
}
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) { struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
// x: [N*grid_t*grid_h*grid_w, in_channels, temporal_patch_size*patch_size*patch_size] // x: [N*grid_t*grid_h*grid_w, in_channels, temporal_patch_size*patch_size*patch_size]
// return: [N*grid_t*grid_h*grid_w, embed_dim] // return: [N*grid_t*grid_h*grid_w, embed_dim]
auto proj = std::dynamic_pointer_cast<Conv3d>(blocks["proj"]);
x = ggml_reshape_4d(ctx, x = ggml_reshape_4d(ctx,
x, x,
patch_size, patch_size,
patch_size, patch_size,
temporal_patch_size, temporal_patch_size,
ggml_nelements(x) / (temporal_patch_size * patch_size * patch_size)); ggml_nelements(x) / (temporal_patch_size * patch_size * patch_size));
if (llama_cpp_style) {
auto proj_0 = std::dynamic_pointer_cast<Conv2d>(blocks["proj.0"]);
auto proj_1 = std::dynamic_pointer_cast<Conv2d>(blocks["proj.1"]);
auto x0 = ggml_slice(ctx, x, 2, 0, 1);
x0 = ggml_reshape_4d(ctx, x0, x0->ne[0], x0->ne[1], in_channels, x0->ne[3] / in_channels);
x0 = proj_0->forward(ctx, x0);
auto x1 = ggml_slice(ctx, x, 2, 1, 2);
x1 = ggml_reshape_4d(ctx, x1, x1->ne[0], x1->ne[1], in_channels, x1->ne[3] / in_channels);
x1 = proj_1->forward(ctx, x1);
x = ggml_add(ctx, x0, x1);
} else {
auto proj = std::dynamic_pointer_cast<Conv3d>(blocks["proj"]);
x = proj->forward(ctx, x); x = proj->forward(ctx, x);
}
x = ggml_reshape_2d(ctx, x, embed_dim, ggml_nelements(x) / embed_dim); x = ggml_reshape_2d(ctx, x, embed_dim, ggml_nelements(x) / embed_dim);
return x; return x;
} }
@ -431,16 +472,24 @@ namespace Qwen {
struct Qwen2_5_VLVisionAttention : public GGMLBlock { struct Qwen2_5_VLVisionAttention : public GGMLBlock {
protected: protected:
bool llama_cpp_style;
int64_t head_dim; int64_t head_dim;
int64_t num_heads; int64_t num_heads;
public: public:
Qwen2_5_VLVisionAttention(int64_t hidden_size, Qwen2_5_VLVisionAttention(bool llama_cpp_style,
int64_t hidden_size,
int64_t num_heads) int64_t num_heads)
: num_heads(num_heads) { : llama_cpp_style(llama_cpp_style), num_heads(num_heads) {
head_dim = hidden_size / num_heads; head_dim = hidden_size / num_heads;
GGML_ASSERT(num_heads * head_dim == hidden_size); GGML_ASSERT(num_heads * head_dim == hidden_size);
if (llama_cpp_style) {
blocks["q_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
blocks["k_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
blocks["v_proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
} else {
blocks["qkv"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size * 3)); blocks["qkv"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size * 3));
}
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size)); blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size));
} }
@ -452,11 +501,25 @@ namespace Qwen {
// x: [N, n_token, hidden_size] // x: [N, n_token, hidden_size]
int64_t n_token = x->ne[1]; int64_t n_token = x->ne[1];
int64_t N = x->ne[2]; int64_t N = x->ne[2];
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]); auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
std::vector<ggml_tensor*> qkv_vec;
if (llama_cpp_style) {
auto q_proj = std::dynamic_pointer_cast<Linear>(blocks["q_proj"]);
auto k_proj = std::dynamic_pointer_cast<Linear>(blocks["k_proj"]);
auto v_proj = std::dynamic_pointer_cast<Linear>(blocks["v_proj"]);
auto q = q_proj->forward(ctx, x);
auto k = k_proj->forward(ctx, x);
auto v = v_proj->forward(ctx, x);
qkv_vec = {q, k, v};
} else {
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
auto qkv = qkv_proj->forward(ctx, x); auto qkv = qkv_proj->forward(ctx, x);
auto qkv_vec = split_qkv(ctx, qkv); qkv_vec = split_qkv(ctx, qkv);
}
auto q = ggml_reshape_4d(ctx, qkv_vec[0], head_dim, num_heads, qkv_vec[0]->ne[1], qkv_vec[0]->ne[2]); // [N, n_token, n_head, d_head] auto q = ggml_reshape_4d(ctx, qkv_vec[0], head_dim, num_heads, qkv_vec[0]->ne[1], qkv_vec[0]->ne[2]); // [N, n_token, n_head, d_head]
auto k = ggml_reshape_4d(ctx, qkv_vec[1], head_dim, num_heads, qkv_vec[1]->ne[1], qkv_vec[1]->ne[2]); // [N, n_token, n_head, d_head] auto k = ggml_reshape_4d(ctx, qkv_vec[1], head_dim, num_heads, qkv_vec[1]->ne[1], qkv_vec[1]->ne[2]); // [N, n_token, n_head, d_head]
auto v = ggml_reshape_4d(ctx, qkv_vec[2], head_dim, num_heads, qkv_vec[2]->ne[1], qkv_vec[2]->ne[2]); // [N, n_token, n_head, d_head] auto v = ggml_reshape_4d(ctx, qkv_vec[2], head_dim, num_heads, qkv_vec[2]->ne[1], qkv_vec[2]->ne[2]); // [N, n_token, n_head, d_head]
@ -470,11 +533,12 @@ namespace Qwen {
struct Qwen2_5_VLVisionBlock : public GGMLBlock { struct Qwen2_5_VLVisionBlock : public GGMLBlock {
public: public:
Qwen2_5_VLVisionBlock(int64_t hidden_size, Qwen2_5_VLVisionBlock(bool llama_cpp_style,
int64_t hidden_size,
int64_t intermediate_size, int64_t intermediate_size,
int64_t num_heads, int64_t num_heads,
float eps = 1e-6f) { float eps = 1e-6f) {
blocks["attn"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionAttention(hidden_size, num_heads)); blocks["attn"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionAttention(llama_cpp_style, hidden_size, num_heads));
blocks["mlp"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLMLP(hidden_size, intermediate_size, true)); blocks["mlp"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLMLP(hidden_size, intermediate_size, true));
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps)); blocks["norm1"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps));
blocks["norm2"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps)); blocks["norm2"] = std::shared_ptr<GGMLBlock>(new RMSNorm(hidden_size, eps));
@ -512,7 +576,8 @@ namespace Qwen {
std::set<int> fullatt_block_indexes; std::set<int> fullatt_block_indexes;
public: public:
Qwen2_5_VLVisionModel(int64_t num_layers, Qwen2_5_VLVisionModel(bool llama_cpp_style,
int64_t num_layers,
int64_t in_channels, int64_t in_channels,
int64_t hidden_size, int64_t hidden_size,
int64_t out_hidden_size, int64_t out_hidden_size,
@ -525,9 +590,14 @@ namespace Qwen {
std::set<int> fullatt_block_indexes = {7, 15, 23, 31}, std::set<int> fullatt_block_indexes = {7, 15, 23, 31},
float eps = 1e-6f) float eps = 1e-6f)
: num_layers(num_layers), fullatt_block_indexes(fullatt_block_indexes), spatial_merge_size(spatial_merge_size) { : num_layers(num_layers), fullatt_block_indexes(fullatt_block_indexes), spatial_merge_size(spatial_merge_size) {
blocks["patch_embed"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VisionPatchEmbed(patch_size, temporal_patch_size, in_channels, hidden_size)); blocks["patch_embed"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VisionPatchEmbed(llama_cpp_style,
patch_size,
temporal_patch_size,
in_channels,
hidden_size));
for (int i = 0; i < num_layers; i++) { for (int i = 0; i < num_layers; i++) {
blocks["blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionBlock(hidden_size, blocks["blocks." + std::to_string(i)] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionBlock(llama_cpp_style,
hidden_size,
intermediate_size, intermediate_size,
num_heads, num_heads,
eps)); eps));
@ -783,7 +853,7 @@ namespace Qwen {
public: public:
Qwen2_5_VL() {} Qwen2_5_VL() {}
Qwen2_5_VL(Qwen2_5_VLParams params, bool enable_vision = false) Qwen2_5_VL(Qwen2_5_VLParams params, bool enable_vision = false, bool llama_cpp_style = false)
: enable_vision(enable_vision), params(params) { : enable_vision(enable_vision), params(params) {
blocks["model"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLTextModel(params.num_layers, blocks["model"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLTextModel(params.num_layers,
params.vocab_size, params.vocab_size,
@ -793,7 +863,8 @@ namespace Qwen {
params.num_kv_heads, params.num_kv_heads,
params.rms_norm_eps)); params.rms_norm_eps));
if (enable_vision) { if (enable_vision) {
blocks["visual"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionModel(params.vision.num_layers, blocks["visual"] = std::shared_ptr<GGMLBlock>(new Qwen2_5_VLVisionModel(llama_cpp_style,
params.vision.num_layers,
params.vision.in_channels, params.vision.in_channels,
params.vision.hidden_size, params.vision.hidden_size,
params.vision.out_hidden_size, params.vision.out_hidden_size,
@ -850,6 +921,7 @@ namespace Qwen {
bool enable_vision_ = false) bool enable_vision_ = false)
: GGMLRunner(backend, offload_params_to_cpu), enable_vision(enable_vision_) { : GGMLRunner(backend, offload_params_to_cpu), enable_vision(enable_vision_) {
bool have_vision_weight = false; bool have_vision_weight = false;
bool llama_cpp_style = false;
for (auto pair : tensor_types) { for (auto pair : tensor_types) {
std::string tensor_name = pair.first; std::string tensor_name = pair.first;
if (tensor_name.find(prefix) == std::string::npos) if (tensor_name.find(prefix) == std::string::npos)
@ -857,17 +929,23 @@ namespace Qwen {
size_t pos = tensor_name.find("visual."); size_t pos = tensor_name.find("visual.");
if (pos != std::string::npos) { if (pos != std::string::npos) {
have_vision_weight = true; have_vision_weight = true;
if (contains(tensor_name, "attn.q_proj")) {
llama_cpp_style = true;
break; break;
} }
} }
}
if (enable_vision && !have_vision_weight) { if (enable_vision && !have_vision_weight) {
LOG_WARN("no vision weights detected, vision disabled"); LOG_WARN("no vision weights detected, vision disabled");
enable_vision = false; enable_vision = false;
} }
if (enable_vision) { if (enable_vision) {
LOG_DEBUG("enable qwen2vl vision"); LOG_DEBUG("enable qwen2vl vision");
if (llama_cpp_style) {
LOG_DEBUG("llama.cpp style vision weight");
} }
model = Qwen2_5_VL(params, enable_vision); }
model = Qwen2_5_VL(params, enable_vision, llama_cpp_style);
model.init(params_ctx, tensor_types, prefix); model.init(params_ctx, tensor_types, prefix);
} }

View File

@ -259,6 +259,13 @@ public:
} }
} }
if (strlen(SAFE_STR(sd_ctx_params->qwen2vl_vision_path)) > 0) {
LOG_INFO("loading qwen2vl vision from '%s'", sd_ctx_params->qwen2vl_vision_path);
if (!model_loader.init_from_file(sd_ctx_params->qwen2vl_vision_path, "text_encoders.qwen2vl.visual.")) {
LOG_WARN("loading qwen2vl vision from '%s' failed", sd_ctx_params->qwen2vl_vision_path);
}
}
if (strlen(SAFE_STR(sd_ctx_params->vae_path)) > 0) { if (strlen(SAFE_STR(sd_ctx_params->vae_path)) > 0) {
LOG_INFO("loading vae from '%s'", sd_ctx_params->vae_path); LOG_INFO("loading vae from '%s'", sd_ctx_params->vae_path);
if (!model_loader.init_from_file(sd_ctx_params->vae_path, "vae.")) { if (!model_loader.init_from_file(sd_ctx_params->vae_path, "vae.")) {
@ -1757,6 +1764,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
"clip_vision_path: %s\n" "clip_vision_path: %s\n"
"t5xxl_path: %s\n" "t5xxl_path: %s\n"
"qwen2vl_path: %s\n" "qwen2vl_path: %s\n"
"qwen2vl_vision_path: %s\n"
"diffusion_model_path: %s\n" "diffusion_model_path: %s\n"
"high_noise_diffusion_model_path: %s\n" "high_noise_diffusion_model_path: %s\n"
"vae_path: %s\n" "vae_path: %s\n"
@ -1785,6 +1793,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
SAFE_STR(sd_ctx_params->clip_vision_path), SAFE_STR(sd_ctx_params->clip_vision_path),
SAFE_STR(sd_ctx_params->t5xxl_path), SAFE_STR(sd_ctx_params->t5xxl_path),
SAFE_STR(sd_ctx_params->qwen2vl_path), SAFE_STR(sd_ctx_params->qwen2vl_path),
SAFE_STR(sd_ctx_params->qwen2vl_vision_path),
SAFE_STR(sd_ctx_params->diffusion_model_path), SAFE_STR(sd_ctx_params->diffusion_model_path),
SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path), SAFE_STR(sd_ctx_params->high_noise_diffusion_model_path),
SAFE_STR(sd_ctx_params->vae_path), SAFE_STR(sd_ctx_params->vae_path),

View File

@ -132,6 +132,7 @@ typedef struct {
const char* clip_vision_path; const char* clip_vision_path;
const char* t5xxl_path; const char* t5xxl_path;
const char* qwen2vl_path; const char* qwen2vl_path;
const char* qwen2vl_vision_path;
const char* diffusion_model_path; const char* diffusion_model_path;
const char* high_noise_diffusion_model_path; const char* high_noise_diffusion_model_path;
const char* vae_path; const char* vae_path;