mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 21:38:58 +00:00
Compare commits
3 Commits
master-407
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11ab095230 | ||
|
|
a3a88fc9b2 | ||
|
|
8823dc48bc |
@ -60,7 +60,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
int32_t num_custom_embeddings = 0;
|
||||
int32_t num_custom_embeddings_2 = 0;
|
||||
std::vector<uint8_t> token_embed_custom;
|
||||
std::vector<std::string> readed_embeddings;
|
||||
std::map<std::string, std::pair<int, int>> embedding_pos_map;
|
||||
|
||||
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
@ -123,14 +123,17 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
}
|
||||
|
||||
bool load_embedding(std::string embd_name, std::string embd_path, std::vector<int32_t>& bpe_tokens) {
|
||||
// the order matters
|
||||
ModelLoader model_loader;
|
||||
if (!model_loader.init_from_file_and_convert_name(embd_path)) {
|
||||
LOG_ERROR("embedding '%s' failed", embd_name.c_str());
|
||||
return false;
|
||||
}
|
||||
if (std::find(readed_embeddings.begin(), readed_embeddings.end(), embd_name) != readed_embeddings.end()) {
|
||||
auto iter = embedding_pos_map.find(embd_name);
|
||||
if (iter != embedding_pos_map.end()) {
|
||||
LOG_DEBUG("embedding already read in: %s", embd_name.c_str());
|
||||
for (int i = iter->second.first; i < iter->second.second; i++) {
|
||||
bpe_tokens.push_back(text_model->model.vocab_size + i);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
struct ggml_init_params params;
|
||||
@ -161,7 +164,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
return true;
|
||||
};
|
||||
model_loader.load_tensors(on_load, 1);
|
||||
readed_embeddings.push_back(embd_name);
|
||||
int pos_start = num_custom_embeddings;
|
||||
if (embd) {
|
||||
int64_t hidden_size = text_model->model.hidden_size;
|
||||
token_embed_custom.resize(token_embed_custom.size() + ggml_nbytes(embd));
|
||||
@ -188,6 +191,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
||||
}
|
||||
LOG_DEBUG("embedding '%s' applied, custom embeddings: %i (text model 2)", embd_name.c_str(), num_custom_embeddings_2);
|
||||
}
|
||||
int pos_end = num_custom_embeddings;
|
||||
if (pos_end == pos_start) {
|
||||
return false;
|
||||
}
|
||||
embedding_pos_map[embd_name] = std::pair{pos_start, pos_end};
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -60,6 +60,14 @@
|
||||
#define SD_UNUSED(x) (void)(x)
|
||||
#endif
|
||||
|
||||
__STATIC_INLINE__ int align_up_offset(int n, int multiple) {
|
||||
return (multiple - n % multiple) % multiple;
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ int align_up(int n, int multiple) {
|
||||
return n + align_up_offset(n, multiple);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void*) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
@ -1392,10 +1400,14 @@ __STATIC_INLINE__ void ggml_ext_backend_tensor_get_and_sync(ggml_backend_t backe
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ float ggml_ext_backend_tensor_get_f32(ggml_tensor* tensor) {
|
||||
GGML_ASSERT(tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_I32 || tensor->type == GGML_TYPE_BF16);
|
||||
float value;
|
||||
if (tensor->type == GGML_TYPE_F32) {
|
||||
ggml_backend_tensor_get(tensor, &value, 0, sizeof(value));
|
||||
} else if (tensor->type == GGML_TYPE_BF16) {
|
||||
ggml_bf16_t bf16_value;
|
||||
ggml_backend_tensor_get(tensor, &bf16_value, 0, sizeof(bf16_value));
|
||||
value = ggml_bf16_to_fp32(bf16_value);
|
||||
} else if (tensor->type == GGML_TYPE_F16) {
|
||||
ggml_fp16_t f16_value;
|
||||
ggml_backend_tensor_get(tensor, &f16_value, 0, sizeof(f16_value));
|
||||
|
||||
@ -1898,6 +1898,18 @@ public:
|
||||
return vae_scale_factor;
|
||||
}
|
||||
|
||||
int get_diffusion_model_down_factor() {
|
||||
int down_factor = 8; // unet
|
||||
if (sd_version_is_dit(version)) {
|
||||
if (sd_version_is_wan(version)) {
|
||||
down_factor = 2;
|
||||
} else {
|
||||
down_factor = 1;
|
||||
}
|
||||
}
|
||||
return down_factor;
|
||||
}
|
||||
|
||||
int get_latent_channel() {
|
||||
int latent_channel = 4;
|
||||
if (sd_version_is_dit(version)) {
|
||||
@ -3133,22 +3145,19 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
||||
sd_ctx->sd->vae_tiling_params = sd_img_gen_params->vae_tiling_params;
|
||||
int width = sd_img_gen_params->width;
|
||||
int height = sd_img_gen_params->height;
|
||||
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
if (sd_version_is_dit(sd_ctx->sd->version)) {
|
||||
if (width % 16 || height % 16) {
|
||||
LOG_ERROR("Image dimensions must be must be a multiple of 16 on each axis for %s models. (Got %dx%d)",
|
||||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return nullptr;
|
||||
}
|
||||
} else if (width % 64 || height % 64) {
|
||||
LOG_ERROR("Image dimensions must be must be a multiple of 64 on each axis for %s models. (Got %dx%d)",
|
||||
model_version_to_str[sd_ctx->sd->version],
|
||||
width,
|
||||
height);
|
||||
return nullptr;
|
||||
int diffusion_model_down_factor = sd_ctx->sd->get_diffusion_model_down_factor();
|
||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||
|
||||
int width_offset = align_up_offset(width, spatial_multiple);
|
||||
int height_offset = align_up_offset(height, spatial_multiple);
|
||||
if (width_offset > 0 || height_offset > 0) {
|
||||
width += width_offset;
|
||||
height += height_offset;
|
||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", sd_img_gen_params->width, sd_img_gen_params->height, width, height, spatial_multiple);
|
||||
}
|
||||
|
||||
LOG_DEBUG("generate_image %dx%d", width, height);
|
||||
if (sd_ctx == nullptr || sd_img_gen_params == nullptr) {
|
||||
return nullptr;
|
||||
@ -3422,9 +3431,19 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
||||
int frames = sd_vid_gen_params->video_frames;
|
||||
frames = (frames - 1) / 4 * 4 + 1;
|
||||
int sample_steps = sd_vid_gen_params->sample_params.sample_steps;
|
||||
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
||||
|
||||
int vae_scale_factor = sd_ctx->sd->get_vae_scale_factor();
|
||||
int diffusion_model_down_factor = sd_ctx->sd->get_diffusion_model_down_factor();
|
||||
int spatial_multiple = vae_scale_factor * diffusion_model_down_factor;
|
||||
|
||||
int width_offset = align_up_offset(width, spatial_multiple);
|
||||
int height_offset = align_up_offset(height, spatial_multiple);
|
||||
if (width_offset > 0 || height_offset > 0) {
|
||||
width += width_offset;
|
||||
height += height_offset;
|
||||
LOG_WARN("align up %dx%d to %dx%d (multiple=%d)", sd_vid_gen_params->width, sd_vid_gen_params->height, width, height, spatial_multiple);
|
||||
}
|
||||
LOG_INFO("generate_video %dx%dx%d", width, height, frames);
|
||||
|
||||
enum sample_method_t sample_method = sd_vid_gen_params->sample_params.sample_method;
|
||||
if (sample_method == SAMPLE_METHOD_COUNT) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user