mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 21:38:58 +00:00
fix: allow resetting clip_skip to its default value (#697)
This commit is contained in:
parent
d6c87dce5c
commit
b1fc16b504
12
clip.hpp
12
clip.hpp
@ -678,8 +678,8 @@ public:
|
|||||||
bool with_final_ln = true;
|
bool with_final_ln = true;
|
||||||
|
|
||||||
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
int clip_skip_value = -1,
|
bool with_final_ln = true,
|
||||||
bool with_final_ln = true)
|
int clip_skip_value = -1)
|
||||||
: version(version), with_final_ln(with_final_ln) {
|
: version(version), with_final_ln(with_final_ln) {
|
||||||
if (version == OPEN_CLIP_VIT_H_14) {
|
if (version == OPEN_CLIP_VIT_H_14) {
|
||||||
hidden_size = 1024;
|
hidden_size = 1024;
|
||||||
@ -701,7 +701,7 @@ public:
|
|||||||
|
|
||||||
void set_clip_skip(int skip) {
|
void set_clip_skip(int skip) {
|
||||||
if (skip <= 0) {
|
if (skip <= 0) {
|
||||||
return;
|
skip = -1;
|
||||||
}
|
}
|
||||||
clip_skip = skip;
|
clip_skip = skip;
|
||||||
}
|
}
|
||||||
@ -871,9 +871,9 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
int clip_skip_value = 1,
|
bool with_final_ln = true,
|
||||||
bool with_final_ln = true)
|
int clip_skip_value = -1)
|
||||||
: GGMLRunner(backend), model(version, clip_skip_value, with_final_ln) {
|
: GGMLRunner(backend), model(version, with_final_ln, clip_skip_value) {
|
||||||
model.init(params_ctx, tensor_types, prefix);
|
model.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -63,23 +63,24 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
PMVersion pv = PM_VERSION_1,
|
PMVersion pv = PM_VERSION_1,
|
||||||
int clip_skip = -1)
|
int clip_skip = -1)
|
||||||
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
||||||
|
if (sd_version_is_sd1(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14);
|
||||||
|
} else if (sd_version_is_sd2(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14);
|
||||||
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||||
|
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||||
|
}
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_clip_skip(int clip_skip) {
|
||||||
if (clip_skip <= 0) {
|
if (clip_skip <= 0) {
|
||||||
clip_skip = 1;
|
clip_skip = 1;
|
||||||
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
||||||
clip_skip = 2;
|
clip_skip = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sd_version_is_sd1(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip);
|
|
||||||
} else if (sd_version_is_sd2(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, clip_skip);
|
|
||||||
} else if (sd_version_is_sdxl(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
|
|
||||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
|
||||||
text_model->set_clip_skip(clip_skip);
|
text_model->set_clip_skip(clip_skip);
|
||||||
if (sd_version_is_sdxl(version)) {
|
if (sd_version_is_sdxl(version)) {
|
||||||
text_model2->set_clip_skip(clip_skip);
|
text_model2->set_clip_skip(clip_skip);
|
||||||
@ -665,15 +666,16 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
int clip_skip = -1)
|
int clip_skip = -1)
|
||||||
: clip_g_tokenizer(0) {
|
: clip_g_tokenizer(0) {
|
||||||
if (clip_skip <= 0) {
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||||
clip_skip = 2;
|
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||||
}
|
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
|
|
||||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
|
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
void set_clip_skip(int clip_skip) {
|
||||||
|
if (clip_skip <= 0) {
|
||||||
|
clip_skip = 2;
|
||||||
|
}
|
||||||
clip_l->set_clip_skip(clip_skip);
|
clip_l->set_clip_skip(clip_skip);
|
||||||
clip_g->set_clip_skip(clip_skip);
|
clip_g->set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
@ -1010,14 +1012,15 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
if (clip_skip <= 0) {
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||||
clip_skip = 2;
|
|
||||||
}
|
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, true);
|
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
void set_clip_skip(int clip_skip) {
|
||||||
|
if (clip_skip <= 0) {
|
||||||
|
clip_skip = 2;
|
||||||
|
}
|
||||||
clip_l->set_clip_skip(clip_skip);
|
clip_l->set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1422,4 +1425,4 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user