mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 21:38:58 +00:00
refector: optimize the usage of tensor_types
This commit is contained in:
parent
7eb30d00e5
commit
f6b9aa1a43
24
clip.hpp
24
clip.hpp
@ -545,9 +545,9 @@ protected:
|
|||||||
int64_t vocab_size;
|
int64_t vocab_size;
|
||||||
int64_t num_positions;
|
int64_t num_positions;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type token_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "token_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "token_embedding.weight"] : GGML_TYPE_F32;
|
enum ggml_type token_wtype = GGML_TYPE_F32;
|
||||||
enum ggml_type position_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;
|
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||||
|
|
||||||
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
|
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
|
||||||
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
|
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
|
||||||
@ -594,10 +594,10 @@ protected:
|
|||||||
int64_t image_size;
|
int64_t image_size;
|
||||||
int64_t num_patches;
|
int64_t num_patches;
|
||||||
int64_t num_positions;
|
int64_t num_positions;
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type patch_wtype = GGML_TYPE_F16; // tensor_types.find(prefix + "patch_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "patch_embedding.weight"] : GGML_TYPE_F16;
|
enum ggml_type patch_wtype = GGML_TYPE_F16;
|
||||||
enum ggml_type class_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "class_embedding") != tensor_types.end() ? tensor_types[prefix + "class_embedding"] : GGML_TYPE_F32;
|
enum ggml_type class_wtype = GGML_TYPE_F32;
|
||||||
enum ggml_type position_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;
|
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||||
|
|
||||||
params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, patch_wtype, patch_size, patch_size, num_channels, embed_dim);
|
params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, patch_wtype, patch_size, patch_size, num_channels, embed_dim);
|
||||||
params["class_embedding"] = ggml_new_tensor_1d(ctx, class_wtype, embed_dim);
|
params["class_embedding"] = ggml_new_tensor_1d(ctx, class_wtype, embed_dim);
|
||||||
@ -657,9 +657,9 @@ enum CLIPVersion {
|
|||||||
|
|
||||||
class CLIPTextModel : public GGMLBlock {
|
class CLIPTextModel : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
if (version == OPEN_CLIP_VIT_BIGG_14) {
|
if (version == OPEN_CLIP_VIT_BIGG_14) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "text_projection") != tensor_types.end() ? tensor_types[prefix + "text_projection"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
|
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -805,8 +805,8 @@ protected:
|
|||||||
int64_t out_features;
|
int64_t out_features;
|
||||||
bool transpose_weight;
|
bool transpose_weight;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = tensor_types.find(prefix + "weight") != tensor_types.end() ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||||
if (transpose_weight) {
|
if (transpose_weight) {
|
||||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
|
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
|
||||||
} else {
|
} else {
|
||||||
@ -868,7 +868,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
CLIPTextModel model;
|
CLIPTextModel model;
|
||||||
|
|
||||||
CLIPTextModelRunner(ggml_backend_t backend,
|
CLIPTextModelRunner(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
bool with_final_ln = true,
|
bool with_final_ln = true,
|
||||||
|
|||||||
10
common.hpp
10
common.hpp
@ -182,9 +182,9 @@ protected:
|
|||||||
int64_t dim_in;
|
int64_t dim_in;
|
||||||
int64_t dim_out;
|
int64_t dim_out;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||||
enum ggml_type wtype = (tensor_types.find(prefix + "proj.weight") != tensor_types.end()) ? tensor_types[prefix + "proj.weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = get_type(prefix + "proj.weight", tensor_types, GGML_TYPE_F32);
|
||||||
enum ggml_type bias_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "proj.bias") != tensor_types.end()) ? tensor_types[prefix + "proj.bias"] : GGML_TYPE_F32;
|
enum ggml_type bias_wtype = GGML_TYPE_F32;
|
||||||
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
|
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
|
||||||
params["proj.bias"] = ggml_new_tensor_1d(ctx, bias_wtype, dim_out * 2);
|
params["proj.bias"] = ggml_new_tensor_1d(ctx, bias_wtype, dim_out * 2);
|
||||||
}
|
}
|
||||||
@ -440,9 +440,9 @@ public:
|
|||||||
|
|
||||||
class AlphaBlender : public GGMLBlock {
|
class AlphaBlender : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||||
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
|
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "mix_factor") != tensor_types.end()) ? tensor_types[prefix + "mix_factor"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -57,7 +57,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
std::vector<std::string> readed_embeddings;
|
std::vector<std::string> readed_embeddings;
|
||||||
|
|
||||||
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
FrozenCLIPEmbedderWithCustomWords(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string& embd_dir,
|
const std::string& embd_dir,
|
||||||
SDVersion version = VERSION_SD1,
|
SDVersion version = VERSION_SD1,
|
||||||
PMVersion pv = PM_VERSION_1,
|
PMVersion pv = PM_VERSION_1,
|
||||||
@ -618,7 +618,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
struct FrozenCLIPVisionEmbedder : public GGMLRunner {
|
||||||
CLIPVisionModelProjection vision_model;
|
CLIPVisionModelProjection vision_model;
|
||||||
|
|
||||||
FrozenCLIPVisionEmbedder(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types)
|
FrozenCLIPVisionEmbedder(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
|
||||||
: vision_model(OPEN_CLIP_VIT_H_14, true), GGMLRunner(backend) {
|
: vision_model(OPEN_CLIP_VIT_H_14, true), GGMLRunner(backend) {
|
||||||
vision_model.init(params_ctx, tensor_types, "cond_stage_model.transformer");
|
vision_model.init(params_ctx, tensor_types, "cond_stage_model.transformer");
|
||||||
}
|
}
|
||||||
@ -663,8 +663,8 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
std::shared_ptr<T5Runner> t5;
|
std::shared_ptr<T5Runner> t5;
|
||||||
|
|
||||||
SD3CLIPEmbedder(ggml_backend_t backend,
|
SD3CLIPEmbedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
int clip_skip = -1)
|
int clip_skip = -1)
|
||||||
: clip_g_tokenizer(0) {
|
: clip_g_tokenizer(0) {
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||||
@ -1010,8 +1010,8 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
size_t chunk_len = 256;
|
size_t chunk_len = 256;
|
||||||
|
|
||||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
set_clip_skip(clip_skip);
|
set_clip_skip(clip_skip);
|
||||||
@ -1231,10 +1231,10 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||||||
int mask_pad = 1;
|
int mask_pad = 1;
|
||||||
|
|
||||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
PixArtCLIPEmbedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
int clip_skip = -1,
|
int clip_skip = -1,
|
||||||
bool use_mask = false,
|
bool use_mask = false,
|
||||||
int mask_pad = 1)
|
int mask_pad = 1)
|
||||||
: use_mask(use_mask), mask_pad(mask_pad) {
|
: use_mask(use_mask), mask_pad(mask_pad) {
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -317,8 +317,8 @@ struct ControlNet : public GGMLRunner {
|
|||||||
bool guided_hint_cached = false;
|
bool guided_hint_cached = false;
|
||||||
|
|
||||||
ControlNet(ggml_backend_t backend,
|
ControlNet(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1)
|
||||||
: GGMLRunner(backend), control_net(version) {
|
: GGMLRunner(backend), control_net(version) {
|
||||||
control_net.init(params_ctx, tensor_types, "");
|
control_net.init(params_ctx, tensor_types, "");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -32,9 +32,9 @@ struct UNetModel : public DiffusionModel {
|
|||||||
UNetModelRunner unet;
|
UNetModelRunner unet;
|
||||||
|
|
||||||
UNetModel(ggml_backend_t backend,
|
UNetModel(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
SDVersion version = VERSION_SD1,
|
SDVersion version = VERSION_SD1,
|
||||||
bool flash_attn = false)
|
bool flash_attn = false)
|
||||||
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
|
: unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ struct MMDiTModel : public DiffusionModel {
|
|||||||
MMDiTRunner mmdit;
|
MMDiTRunner mmdit;
|
||||||
|
|
||||||
MMDiTModel(ggml_backend_t backend,
|
MMDiTModel(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types)
|
const String2GGMLType& tensor_types = {})
|
||||||
: mmdit(backend, tensor_types, "model.diffusion_model") {
|
: mmdit(backend, tensor_types, "model.diffusion_model") {
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,10 +135,10 @@ struct FluxModel : public DiffusionModel {
|
|||||||
Flux::FluxRunner flux;
|
Flux::FluxRunner flux;
|
||||||
|
|
||||||
FluxModel(ggml_backend_t backend,
|
FluxModel(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
SDVersion version = VERSION_FLUX,
|
SDVersion version = VERSION_FLUX,
|
||||||
bool flash_attn = false,
|
bool flash_attn = false,
|
||||||
bool use_mask = false)
|
bool use_mask = false)
|
||||||
: flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
: flux(backend, tensor_types, "model.diffusion_model", version, flash_attn, use_mask) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -142,7 +142,7 @@ struct ESRGAN : public GGMLRunner {
|
|||||||
int scale = 4;
|
int scale = 4;
|
||||||
int tile_size = 128; // avoid cuda OOM for 4gb VRAM
|
int tile_size = 128; // avoid cuda OOM for 4gb VRAM
|
||||||
|
|
||||||
ESRGAN(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types)
|
ESRGAN(ggml_backend_t backend, const String2GGMLType& tensor_types = {})
|
||||||
: GGMLRunner(backend) {
|
: GGMLRunner(backend) {
|
||||||
rrdb_net.init(params_ctx, tensor_types, "");
|
rrdb_net.init(params_ctx, tensor_types, "");
|
||||||
}
|
}
|
||||||
|
|||||||
16
flux.hpp
16
flux.hpp
@ -35,8 +35,8 @@ namespace Flux {
|
|||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
float eps;
|
float eps;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "scale") != tensor_types.end()) ? tensor_types[prefix + "scale"] : GGML_TYPE_F32;
|
ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["scale"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
params["scale"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1039,8 +1039,6 @@ namespace Flux {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct FluxRunner : public GGMLRunner {
|
struct FluxRunner : public GGMLRunner {
|
||||||
static std::map<std::string, enum ggml_type> empty_tensor_types;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FluxParams flux_params;
|
FluxParams flux_params;
|
||||||
Flux flux;
|
Flux flux;
|
||||||
@ -1050,11 +1048,11 @@ namespace Flux {
|
|||||||
bool use_mask = false;
|
bool use_mask = false;
|
||||||
|
|
||||||
FluxRunner(ggml_backend_t backend,
|
FluxRunner(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_FLUX,
|
SDVersion version = VERSION_FLUX,
|
||||||
bool flash_attn = false,
|
bool flash_attn = false,
|
||||||
bool use_mask = false)
|
bool use_mask = false)
|
||||||
: GGMLRunner(backend), use_mask(use_mask) {
|
: GGMLRunner(backend), use_mask(use_mask) {
|
||||||
flux_params.flash_attn = flash_attn;
|
flux_params.flash_attn = flash_attn;
|
||||||
flux_params.guidance_embed = false;
|
flux_params.guidance_embed = false;
|
||||||
|
|||||||
@ -841,21 +841,19 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
float scale = (1.0f / sqrt((float)d_head));
|
float scale = (1.0f / sqrt((float)d_head));
|
||||||
|
|
||||||
int kv_pad = 0;
|
int kv_pad = 0;
|
||||||
//if (flash_attn) {
|
// if (flash_attn) {
|
||||||
// LOG_DEBUG("attention_ext L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
|
// LOG_DEBUG("attention_ext L_q:%d L_k:%d n_head:%d C:%d d_head:%d N:%d", L_q, L_k, n_head, C, d_head, N);
|
||||||
//}
|
// }
|
||||||
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
|
// is there anything oddly shaped?? ping Green-Sky if you can trip this assert
|
||||||
GGML_ASSERT(((L_k % 256 == 0) && L_q == L_k) || !(L_k % 256 == 0));
|
GGML_ASSERT(((L_k % 256 == 0) && L_q == L_k) || !(L_k % 256 == 0));
|
||||||
|
|
||||||
bool can_use_flash_attn = true;
|
bool can_use_flash_attn = true;
|
||||||
can_use_flash_attn = can_use_flash_attn && (
|
can_use_flash_attn = can_use_flash_attn && (d_head == 64 ||
|
||||||
d_head == 64 ||
|
d_head == 80 ||
|
||||||
d_head == 80 ||
|
d_head == 96 ||
|
||||||
d_head == 96 ||
|
d_head == 112 ||
|
||||||
d_head == 112 ||
|
d_head == 128 ||
|
||||||
d_head == 128 ||
|
d_head == 256);
|
||||||
d_head == 256
|
|
||||||
);
|
|
||||||
#if 0
|
#if 0
|
||||||
can_use_flash_attn = can_use_flash_attn && L_k % 256 == 0;
|
can_use_flash_attn = can_use_flash_attn && L_k % 256 == 0;
|
||||||
#else
|
#else
|
||||||
@ -880,9 +878,9 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention_ext(struct ggml_context*
|
|||||||
ggml_tensor* kqv = nullptr;
|
ggml_tensor* kqv = nullptr;
|
||||||
// GGML_ASSERT((flash_attn && can_use_flash_attn) || !flash_attn);
|
// GGML_ASSERT((flash_attn && can_use_flash_attn) || !flash_attn);
|
||||||
if (can_use_flash_attn && flash_attn) {
|
if (can_use_flash_attn && flash_attn) {
|
||||||
//LOG_DEBUG(" uses flash attention");
|
// LOG_DEBUG(" uses flash attention");
|
||||||
if (kv_pad != 0) {
|
if (kv_pad != 0) {
|
||||||
//LOG_DEBUG(" padding k and v dim1 by %d", kv_pad);
|
// LOG_DEBUG(" padding k and v dim1 by %d", kv_pad);
|
||||||
k = ggml_pad(ctx, k, 0, kv_pad, 0, 0);
|
k = ggml_pad(ctx, k, 0, kv_pad, 0, 0);
|
||||||
}
|
}
|
||||||
k = ggml_cast(ctx, k, GGML_TYPE_F16);
|
k = ggml_cast(ctx, k, GGML_TYPE_F16);
|
||||||
@ -1099,6 +1097,8 @@ __STATIC_INLINE__ size_t ggml_tensor_num(ggml_context* ctx) {
|
|||||||
#define MAX_PARAMS_TENSOR_NUM 32768
|
#define MAX_PARAMS_TENSOR_NUM 32768
|
||||||
#define MAX_GRAPH_SIZE 32768
|
#define MAX_GRAPH_SIZE 32768
|
||||||
|
|
||||||
|
typedef std::map<std::string, enum ggml_type> String2GGMLType;
|
||||||
|
|
||||||
struct GGMLRunner {
|
struct GGMLRunner {
|
||||||
protected:
|
protected:
|
||||||
typedef std::function<struct ggml_cgraph*()> get_graph_cb_t;
|
typedef std::function<struct ggml_cgraph*()> get_graph_cb_t;
|
||||||
@ -1310,17 +1310,25 @@ protected:
|
|||||||
GGMLBlockMap blocks;
|
GGMLBlockMap blocks;
|
||||||
ParameterMap params;
|
ParameterMap params;
|
||||||
|
|
||||||
void init_blocks(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
ggml_type get_type(const std::string& name, const String2GGMLType& tensor_types, ggml_type default_type) {
|
||||||
|
auto iter = tensor_types.find(name);
|
||||||
|
if (iter != tensor_types.end()) {
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
return default_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void init_blocks(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
for (auto& pair : blocks) {
|
for (auto& pair : blocks) {
|
||||||
auto& block = pair.second;
|
auto& block = pair.second;
|
||||||
block->init(ctx, tensor_types, prefix + pair.first);
|
block->init(ctx, tensor_types, prefix + pair.first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {}
|
virtual void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void init(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
|
void init(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||||
if (prefix.size() > 0) {
|
if (prefix.size() > 0) {
|
||||||
prefix = prefix + ".";
|
prefix = prefix + ".";
|
||||||
}
|
}
|
||||||
@ -1381,8 +1389,8 @@ protected:
|
|||||||
bool bias;
|
bool bias;
|
||||||
bool force_f32;
|
bool force_f32;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = (tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||||
if (in_features % ggml_blck_size(wtype) != 0 || force_f32) {
|
if (in_features % ggml_blck_size(wtype) != 0 || force_f32) {
|
||||||
wtype = GGML_TYPE_F32;
|
wtype = GGML_TYPE_F32;
|
||||||
}
|
}
|
||||||
@ -1417,8 +1425,8 @@ class Embedding : public UnaryBlock {
|
|||||||
protected:
|
protected:
|
||||||
int64_t embedding_dim;
|
int64_t embedding_dim;
|
||||||
int64_t num_embeddings;
|
int64_t num_embeddings;
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = (tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = get_type(prefix + "weight", tensor_types, GGML_TYPE_F32);
|
||||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, embedding_dim, num_embeddings);
|
params["weight"] = ggml_new_tensor_2d(ctx, wtype, embedding_dim, num_embeddings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1457,11 +1465,11 @@ protected:
|
|||||||
std::pair<int, int> dilation;
|
std::pair<int, int> dilation;
|
||||||
bool bias;
|
bool bias;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F16; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F16;
|
enum ggml_type wtype = GGML_TYPE_F16;
|
||||||
params["weight"] = ggml_new_tensor_4d(ctx, wtype, kernel_size.second, kernel_size.first, in_channels, out_channels);
|
params["weight"] = ggml_new_tensor_4d(ctx, wtype, kernel_size.second, kernel_size.first, in_channels, out_channels);
|
||||||
if (bias) {
|
if (bias) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; // (tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
|
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1502,11 +1510,11 @@ protected:
|
|||||||
int64_t dilation;
|
int64_t dilation;
|
||||||
bool bias;
|
bool bias;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F16; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F16;
|
enum ggml_type wtype = GGML_TYPE_F16;
|
||||||
params["weight"] = ggml_new_tensor_4d(ctx, wtype, 1, kernel_size, in_channels, out_channels); // 5d => 4d
|
params["weight"] = ggml_new_tensor_4d(ctx, wtype, 1, kernel_size, in_channels, out_channels); // 5d => 4d
|
||||||
if (bias) {
|
if (bias) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
|
params["bias"] = ggml_new_tensor_1d(ctx, wtype, out_channels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1546,12 +1554,12 @@ protected:
|
|||||||
bool elementwise_affine;
|
bool elementwise_affine;
|
||||||
bool bias;
|
bool bias;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
if (elementwise_affine) {
|
if (elementwise_affine) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
|
params["weight"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
|
||||||
if (bias) {
|
if (bias) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["bias"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
|
params["bias"] = ggml_new_tensor_1d(ctx, wtype, normalized_shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1588,10 +1596,10 @@ protected:
|
|||||||
float eps;
|
float eps;
|
||||||
bool affine;
|
bool affine;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
if (affine) {
|
if (affine) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
enum ggml_type bias_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "bias") != tensor_types.end()) ? tensor_types[prefix + "bias"] : GGML_TYPE_F32;
|
enum ggml_type bias_wtype = GGML_TYPE_F32;
|
||||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, num_channels);
|
params["weight"] = ggml_new_tensor_1d(ctx, wtype, num_channels);
|
||||||
params["bias"] = ggml_new_tensor_1d(ctx, bias_wtype, num_channels);
|
params["bias"] = ggml_new_tensor_1d(ctx, bias_wtype, num_channels);
|
||||||
}
|
}
|
||||||
|
|||||||
16
mmdit.hpp
16
mmdit.hpp
@ -147,8 +147,8 @@ protected:
|
|||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
float eps;
|
float eps;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -652,13 +652,13 @@ protected:
|
|||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
std::string qk_norm;
|
std::string qk_norm;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "pos_embed") != tensor_types.end()) ? tensor_types[prefix + "pos_embed"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
|
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
MMDiT(std::map<std::string, enum ggml_type>& tensor_types) {
|
MMDiT(const String2GGMLType& tensor_types = {}) {
|
||||||
// input_size is always None
|
// input_size is always None
|
||||||
// learn_sigma is always False
|
// learn_sigma is always False
|
||||||
// register_length is alwalys 0
|
// register_length is alwalys 0
|
||||||
@ -869,11 +869,9 @@ public:
|
|||||||
struct MMDiTRunner : public GGMLRunner {
|
struct MMDiTRunner : public GGMLRunner {
|
||||||
MMDiT mmdit;
|
MMDiT mmdit;
|
||||||
|
|
||||||
static std::map<std::string, enum ggml_type> empty_tensor_types;
|
|
||||||
|
|
||||||
MMDiTRunner(ggml_backend_t backend,
|
MMDiTRunner(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
const std::string prefix = "")
|
const std::string prefix = "")
|
||||||
: GGMLRunner(backend), mmdit(tensor_types) {
|
: GGMLRunner(backend), mmdit(tensor_types) {
|
||||||
mmdit.init(params_ctx, tensor_types, prefix);
|
mmdit.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -648,7 +648,7 @@ std::string convert_tensor_name(std::string name) {
|
|||||||
return new_name;
|
return new_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_preprocess_tensor_storage_types(std::map<std::string, enum ggml_type>& tensor_storages_types, std::string name, enum ggml_type type) {
|
void add_preprocess_tensor_storage_types(String2GGMLType& tensor_storages_types, std::string name, enum ggml_type type) {
|
||||||
std::string new_name = convert_tensor_name(name);
|
std::string new_name = convert_tensor_name(name);
|
||||||
|
|
||||||
if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_weight")) {
|
if (new_name.find("cond_stage_model") != std::string::npos && ends_with(new_name, "attn.in_proj_weight")) {
|
||||||
|
|||||||
4
model.h
4
model.h
@ -207,6 +207,8 @@ struct TensorStorage {
|
|||||||
|
|
||||||
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
|
typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_cb_t;
|
||||||
|
|
||||||
|
typedef std::map<std::string, enum ggml_type> String2GGMLType;
|
||||||
|
|
||||||
class ModelLoader {
|
class ModelLoader {
|
||||||
protected:
|
protected:
|
||||||
std::vector<std::string> file_paths_;
|
std::vector<std::string> file_paths_;
|
||||||
@ -225,7 +227,7 @@ protected:
|
|||||||
bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = "");
|
bool init_from_diffusers_file(const std::string& file_path, const std::string& prefix = "");
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::map<std::string, enum ggml_type> tensor_storages_types;
|
String2GGMLType tensor_storages_types;
|
||||||
|
|
||||||
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
bool init_from_file(const std::string& file_path, const std::string& prefix = "");
|
||||||
bool model_is_unet();
|
bool model_is_unet();
|
||||||
|
|||||||
7
pmid.hpp
7
pmid.hpp
@ -623,7 +623,12 @@ public:
|
|||||||
std::vector<float> zeros_right;
|
std::vector<float> zeros_right;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PhotoMakerIDEncoder(ggml_backend_t backend, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix, SDVersion version = VERSION_SDXL, PMVersion pm_v = PM_VERSION_1, float sty = 20.f)
|
PhotoMakerIDEncoder(ggml_backend_t backend,
|
||||||
|
const String2GGMLType& tensor_types,
|
||||||
|
const std::string prefix,
|
||||||
|
SDVersion version = VERSION_SDXL,
|
||||||
|
PMVersion pm_v = PM_VERSION_1,
|
||||||
|
float sty = 20.f)
|
||||||
: GGMLRunner(backend),
|
: GGMLRunner(backend),
|
||||||
version(version),
|
version(version),
|
||||||
pm_version(pm_v),
|
pm_version(pm_v),
|
||||||
|
|||||||
22
t5.hpp
22
t5.hpp
@ -457,8 +457,8 @@ protected:
|
|||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
float eps;
|
float eps;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "weight") != tensor_types.end()) ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
params["weight"] = ggml_new_tensor_1d(ctx, wtype, hidden_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -735,7 +735,7 @@ struct T5Runner : public GGMLRunner {
|
|||||||
std::vector<int> relative_position_bucket_vec;
|
std::vector<int> relative_position_bucket_vec;
|
||||||
|
|
||||||
T5Runner(ggml_backend_t backend,
|
T5Runner(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
int64_t num_layers = 24,
|
int64_t num_layers = 24,
|
||||||
int64_t model_dim = 4096,
|
int64_t model_dim = 4096,
|
||||||
@ -876,16 +876,14 @@ struct T5Embedder {
|
|||||||
T5UniGramTokenizer tokenizer;
|
T5UniGramTokenizer tokenizer;
|
||||||
T5Runner model;
|
T5Runner model;
|
||||||
|
|
||||||
static std::map<std::string, enum ggml_type> empty_tensor_types;
|
|
||||||
|
|
||||||
T5Embedder(ggml_backend_t backend,
|
T5Embedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types,
|
const String2GGMLType& tensor_types = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
int64_t num_layers = 24,
|
int64_t num_layers = 24,
|
||||||
int64_t model_dim = 4096,
|
int64_t model_dim = 4096,
|
||||||
int64_t ff_dim = 10240,
|
int64_t ff_dim = 10240,
|
||||||
int64_t num_heads = 64,
|
int64_t num_heads = 64,
|
||||||
int64_t vocab_size = 32128)
|
int64_t vocab_size = 32128)
|
||||||
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
: model(backend, tensor_types, prefix, num_layers, model_dim, ff_dim, num_heads, vocab_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
tae.hpp
2
tae.hpp
@ -196,7 +196,7 @@ struct TinyAutoEncoder : public GGMLRunner {
|
|||||||
bool decode_only = false;
|
bool decode_only = false;
|
||||||
|
|
||||||
TinyAutoEncoder(ggml_backend_t backend,
|
TinyAutoEncoder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool decoder_only = true,
|
bool decoder_only = true,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1)
|
||||||
|
|||||||
5
unet.hpp
5
unet.hpp
@ -166,7 +166,6 @@ public:
|
|||||||
// ldm.modules.diffusionmodules.openaimodel.UNetModel
|
// ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||||
class UnetModelBlock : public GGMLBlock {
|
class UnetModelBlock : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
static std::map<std::string, enum ggml_type> empty_tensor_types;
|
|
||||||
SDVersion version = VERSION_SD1;
|
SDVersion version = VERSION_SD1;
|
||||||
// network hparams
|
// network hparams
|
||||||
int in_channels = 4;
|
int in_channels = 4;
|
||||||
@ -184,7 +183,7 @@ public:
|
|||||||
int model_channels = 320;
|
int model_channels = 320;
|
||||||
int adm_in_channels = 2816; // only for VERSION_SDXL/SVD
|
int adm_in_channels = 2816; // only for VERSION_SDXL/SVD
|
||||||
|
|
||||||
UnetModelBlock(SDVersion version = VERSION_SD1, std::map<std::string, enum ggml_type>& tensor_types = empty_tensor_types, bool flash_attn = false)
|
UnetModelBlock(SDVersion version = VERSION_SD1, const String2GGMLType& tensor_types = {}, bool flash_attn = false)
|
||||||
: version(version) {
|
: version(version) {
|
||||||
if (sd_version_is_sd2(version)) {
|
if (sd_version_is_sd2(version)) {
|
||||||
context_dim = 1024;
|
context_dim = 1024;
|
||||||
@ -539,7 +538,7 @@ struct UNetModelRunner : public GGMLRunner {
|
|||||||
UnetModelBlock unet;
|
UnetModelBlock unet;
|
||||||
|
|
||||||
UNetModelRunner(ggml_backend_t backend,
|
UNetModelRunner(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
SDVersion version = VERSION_SD1,
|
SDVersion version = VERSION_SD1,
|
||||||
bool flash_attn = false)
|
bool flash_attn = false)
|
||||||
|
|||||||
6
vae.hpp
6
vae.hpp
@ -163,8 +163,8 @@ public:
|
|||||||
|
|
||||||
class VideoResnetBlock : public ResnetBlock {
|
class VideoResnetBlock : public ResnetBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
|
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = "") {
|
||||||
enum ggml_type wtype = (tensor_types.find(prefix + "mix_factor") != tensor_types.end()) ? tensor_types[prefix + "mix_factor"] : GGML_TYPE_F32;
|
enum ggml_type wtype = get_type(prefix + "mix_factor", tensor_types, GGML_TYPE_F32);
|
||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,7 +525,7 @@ struct AutoEncoderKL : public GGMLRunner {
|
|||||||
AutoencodingEngine ae;
|
AutoencodingEngine ae;
|
||||||
|
|
||||||
AutoEncoderKL(ggml_backend_t backend,
|
AutoEncoderKL(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
const String2GGMLType& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
bool decode_only = false,
|
bool decode_only = false,
|
||||||
bool use_video_decoder = false,
|
bool use_video_decoder = false,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user