Compare commits

...

6 Commits

Author SHA1 Message Date
vmobilis
10c6501bd0
fix missing argument in prototype of stbi_write_jpg (#613) 2025-03-09 12:30:10 +08:00
vmobilis
10feacf031
fix: correct img2img time (#616) 2025-03-09 12:29:08 +08:00
vmobilis
655f8a5169
fix: clang complains about needless braces (#618) 2025-03-09 12:26:41 +08:00
idostyle
d7c7a34712
fix: ModelLoader::load_tensors duplicated check (#623)
Introduced in 2b6ec97fe244d03c40aa8d70131d40bb086099b0
2025-03-09 12:23:23 +08:00
vmobilis
81556f3136
chore: silence some warnings about precision loss (#620) 2025-03-09 12:22:39 +08:00
stduhpf
3fb275a67b
fix: suport sdxl embedddings (#621) 2025-03-09 12:21:23 +08:00
6 changed files with 64 additions and 39 deletions

View File

@ -51,7 +51,8 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
std::string trigger_word = "img"; // should be user settable std::string trigger_word = "img"; // should be user settable
std::string embd_dir; std::string embd_dir;
int32_t num_custom_embeddings = 0; int32_t num_custom_embeddings = 0;
int32_t num_custom_embeddings_2 = 0;
std::vector<uint8_t> token_embed_custom; std::vector<uint8_t> token_embed_custom;
std::vector<std::string> readed_embeddings; std::vector<std::string> readed_embeddings;
@ -131,28 +132,55 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
params.no_alloc = false; params.no_alloc = false;
struct ggml_context* embd_ctx = ggml_init(params); struct ggml_context* embd_ctx = ggml_init(params);
struct ggml_tensor* embd = NULL; struct ggml_tensor* embd = NULL;
int64_t hidden_size = text_model->model.hidden_size; struct ggml_tensor* embd2 = NULL;
auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) { auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) {
if (tensor_storage.ne[0] != hidden_size) { if (tensor_storage.ne[0] != text_model->model.hidden_size) {
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size); if (text_model2) {
return false; if (tensor_storage.ne[0] == text_model2->model.hidden_size) {
embd2 = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model2->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
*dst_tensor = embd2;
} else {
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i or %i", tensor_storage.ne[0], text_model->model.hidden_size, text_model2->model.hidden_size);
return false;
}
} else {
LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], text_model->model.hidden_size);
return false;
}
} else {
embd = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, text_model->model.hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
*dst_tensor = embd;
} }
embd = ggml_new_tensor_2d(embd_ctx, tensor_storage.type, hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1);
*dst_tensor = embd;
return true; return true;
}; };
model_loader.load_tensors(on_load, NULL); model_loader.load_tensors(on_load, NULL);
readed_embeddings.push_back(embd_name); readed_embeddings.push_back(embd_name);
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd)); if (embd) {
memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)), int64_t hidden_size = text_model->model.hidden_size;
embd->data, token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
ggml_nbytes(embd)); memcpy((void*)(token_embed_custom.data() + num_custom_embeddings * hidden_size * ggml_type_size(embd->type)),
for (int i = 0; i < embd->ne[1]; i++) { embd->data,
bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings); ggml_nbytes(embd));
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings); for (int i = 0; i < embd->ne[1]; i++) {
num_custom_embeddings++; bpe_tokens.push_back(text_model->model.vocab_size + num_custom_embeddings);
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
num_custom_embeddings++;
}
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
}
if (embd2) {
int64_t hidden_size = text_model2->model.hidden_size;
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd2));
memcpy((void*)(token_embed_custom.data() + num_custom_embeddings_2 * hidden_size * ggml_type_size(embd2->type)),
embd2->data,
ggml_nbytes(embd2));
for (int i = 0; i < embd2->ne[1]; i++) {
bpe_tokens.push_back(text_model2->model.vocab_size + num_custom_embeddings_2);
// LOG_DEBUG("new custom token: %i", text_model.vocab_size + num_custom_embeddings);
num_custom_embeddings_2++;
}
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i (text model 2)", embd_name.c_str(), num_custom_embeddings_2);
} }
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i", embd_name.c_str(), num_custom_embeddings);
return true; return true;
} }

View File

@ -126,9 +126,9 @@ struct SDParams {
int upscale_repeats = 1; int upscale_repeats = 1;
std::vector<int> skip_layers = {7, 8, 9}; std::vector<int> skip_layers = {7, 8, 9};
float slg_scale = 0.; float slg_scale = 0.f;
float skip_layer_start = 0.01; float skip_layer_start = 0.01f;
float skip_layer_end = 0.2; float skip_layer_end = 0.2f;
}; };
void print_params(SDParams params) { void print_params(SDParams params) {

View File

@ -329,21 +329,21 @@ const std::vector<std::vector<float>> GITS_NOISE_1_50 = {
}; };
const std::vector<const std::vector<std::vector<float>>*> GITS_NOISE = { const std::vector<const std::vector<std::vector<float>>*> GITS_NOISE = {
{ &GITS_NOISE_0_80 }, &GITS_NOISE_0_80,
{ &GITS_NOISE_0_85 }, &GITS_NOISE_0_85,
{ &GITS_NOISE_0_90 }, &GITS_NOISE_0_90,
{ &GITS_NOISE_0_95 }, &GITS_NOISE_0_95,
{ &GITS_NOISE_1_00 }, &GITS_NOISE_1_00,
{ &GITS_NOISE_1_05 }, &GITS_NOISE_1_05,
{ &GITS_NOISE_1_10 }, &GITS_NOISE_1_10,
{ &GITS_NOISE_1_15 }, &GITS_NOISE_1_15,
{ &GITS_NOISE_1_20 }, &GITS_NOISE_1_20,
{ &GITS_NOISE_1_25 }, &GITS_NOISE_1_25,
{ &GITS_NOISE_1_30 }, &GITS_NOISE_1_30,
{ &GITS_NOISE_1_35 }, &GITS_NOISE_1_35,
{ &GITS_NOISE_1_40 }, &GITS_NOISE_1_40,
{ &GITS_NOISE_1_45 }, &GITS_NOISE_1_45,
{ &GITS_NOISE_1_50 } &GITS_NOISE_1_50
}; };
#endif // GITS_NOISE_INL #endif // GITS_NOISE_INL

View File

@ -1929,9 +1929,6 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) { if (pair.first.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos) {
continue; continue;
} }
if (pair.first.find("alphas_cumprod") != std::string::npos) {
continue;
}
if (pair.first.find("alphas_cumprod") != std::string::npos) { if (pair.first.find("alphas_cumprod") != std::string::npos) {
continue; continue;

View File

@ -1806,7 +1806,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx,
size_t t2 = ggml_time_ms(); size_t t2 = ggml_time_ms();
LOG_INFO("img2img completed in %.2fs", (t1 - t0) * 1.0f / 1000); LOG_INFO("img2img completed in %.2fs", (t2 - t0) * 1.0f / 1000);
return result_images; return result_images;
} }

View File

@ -177,7 +177,7 @@ STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const
STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality, const char* parameters = NULL);
#ifdef STBIW_WINDOWS_UTF8 #ifdef STBIW_WINDOWS_UTF8
STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);