mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-02-04 02:43:36 +00:00
feat: add support for Segmind's Vega model (#1195)
This commit is contained in:
parent
9293016c9d
commit
639091fbe9
@ -1,8 +1,8 @@
|
|||||||
# Running distilled models: SSD1B and SDx.x with tiny U-Nets
|
# Running distilled models: SSD1B, Vega and SDx.x with tiny U-Nets
|
||||||
|
|
||||||
## Preface
|
## Preface
|
||||||
|
|
||||||
These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
|
These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B and Vega U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
|
||||||
Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
|
Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
|
||||||
|
|
||||||
## SSD1B
|
## SSD1B
|
||||||
@ -17,7 +17,17 @@ Useful LoRAs are also available:
|
|||||||
* https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
|
* https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
|
||||||
* https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
|
* https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
|
||||||
|
|
||||||
These files can be used out-of-the-box, unlike the models described in the next section.
|
## Vega
|
||||||
|
|
||||||
|
Segmind's Vega model is available online here:
|
||||||
|
|
||||||
|
* https://huggingface.co/segmind/Segmind-Vega/resolve/main/segmind-vega.safetensors
|
||||||
|
|
||||||
|
VegaRT is an example for an LCM-LoRA:
|
||||||
|
|
||||||
|
* https://huggingface.co/segmind/Segmind-VegaRT/resolve/main/pytorch_lora_weights.safetensors
|
||||||
|
|
||||||
|
Both files can be used out-of-the-box, unlike the models described in next sections.
|
||||||
|
|
||||||
|
|
||||||
## SD1.x, SD2.x with tiny U-Nets
|
## SD1.x, SD2.x with tiny U-Nets
|
||||||
|
|||||||
@ -1040,6 +1040,7 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
int64_t patch_embedding_channels = 0;
|
int64_t patch_embedding_channels = 0;
|
||||||
bool has_img_emb = false;
|
bool has_img_emb = false;
|
||||||
bool has_middle_block_1 = false;
|
bool has_middle_block_1 = false;
|
||||||
|
bool has_output_block_311 = false;
|
||||||
bool has_output_block_71 = false;
|
bool has_output_block_71 = false;
|
||||||
|
|
||||||
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
@ -1100,6 +1101,9 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
|
tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
|
||||||
has_middle_block_1 = true;
|
has_middle_block_1 = true;
|
||||||
}
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos) {
|
||||||
|
has_output_block_311 = true;
|
||||||
|
}
|
||||||
if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {
|
if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {
|
||||||
has_output_block_71 = true;
|
has_output_block_71 = true;
|
||||||
}
|
}
|
||||||
@ -1138,6 +1142,9 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
return VERSION_SDXL_PIX2PIX;
|
return VERSION_SDXL_PIX2PIX;
|
||||||
}
|
}
|
||||||
if (!has_middle_block_1) {
|
if (!has_middle_block_1) {
|
||||||
|
if (!has_output_block_311) {
|
||||||
|
return VERSION_SDXL_VEGA;
|
||||||
|
}
|
||||||
return VERSION_SDXL_SSD1B;
|
return VERSION_SDXL_SSD1B;
|
||||||
}
|
}
|
||||||
return VERSION_SDXL;
|
return VERSION_SDXL;
|
||||||
|
|||||||
3
model.h
3
model.h
@ -32,6 +32,7 @@ enum SDVersion {
|
|||||||
VERSION_SDXL,
|
VERSION_SDXL,
|
||||||
VERSION_SDXL_INPAINT,
|
VERSION_SDXL_INPAINT,
|
||||||
VERSION_SDXL_PIX2PIX,
|
VERSION_SDXL_PIX2PIX,
|
||||||
|
VERSION_SDXL_VEGA,
|
||||||
VERSION_SDXL_SSD1B,
|
VERSION_SDXL_SSD1B,
|
||||||
VERSION_SVD,
|
VERSION_SVD,
|
||||||
VERSION_SD3,
|
VERSION_SD3,
|
||||||
@ -66,7 +67,7 @@ static inline bool sd_version_is_sd2(SDVersion version) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline bool sd_version_is_sdxl(SDVersion version) {
|
static inline bool sd_version_is_sdxl(SDVersion version) {
|
||||||
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B) {
|
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B || version == VERSION_SDXL_VEGA) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@ -35,6 +35,7 @@ const char* model_version_to_str[] = {
|
|||||||
"SDXL",
|
"SDXL",
|
||||||
"SDXL Inpaint",
|
"SDXL Inpaint",
|
||||||
"SDXL Instruct-Pix2Pix",
|
"SDXL Instruct-Pix2Pix",
|
||||||
|
"SDXL (Vega)",
|
||||||
"SDXL (SSD1B)",
|
"SDXL (SSD1B)",
|
||||||
"SVD",
|
"SVD",
|
||||||
"SD3.x",
|
"SD3.x",
|
||||||
|
|||||||
7
unet.hpp
7
unet.hpp
@ -201,6 +201,9 @@ public:
|
|||||||
num_head_channels = 64;
|
num_head_channels = 64;
|
||||||
num_heads = -1;
|
num_heads = -1;
|
||||||
use_linear_projection = true;
|
use_linear_projection = true;
|
||||||
|
if (version == VERSION_SDXL_VEGA) {
|
||||||
|
transformer_depth = {1, 1, 2};
|
||||||
|
}
|
||||||
} else if (version == VERSION_SVD) {
|
} else if (version == VERSION_SVD) {
|
||||||
in_channels = 8;
|
in_channels = 8;
|
||||||
out_channels = 4;
|
out_channels = 4;
|
||||||
@ -319,7 +322,7 @@ public:
|
|||||||
}
|
}
|
||||||
if (!tiny_unet) {
|
if (!tiny_unet) {
|
||||||
blocks["middle_block.0"] = std::shared_ptr<GGMLBlock>(get_resblock(ch, time_embed_dim, ch));
|
blocks["middle_block.0"] = std::shared_ptr<GGMLBlock>(get_resblock(ch, time_embed_dim, ch));
|
||||||
if (version != VERSION_SDXL_SSD1B) {
|
if (version != VERSION_SDXL_SSD1B && version != VERSION_SDXL_VEGA) {
|
||||||
blocks["middle_block.1"] = std::shared_ptr<GGMLBlock>(get_attention_layer(ch,
|
blocks["middle_block.1"] = std::shared_ptr<GGMLBlock>(get_attention_layer(ch,
|
||||||
n_head,
|
n_head,
|
||||||
d_head,
|
d_head,
|
||||||
@ -520,7 +523,7 @@ public:
|
|||||||
// middle_block
|
// middle_block
|
||||||
if (!tiny_unet) {
|
if (!tiny_unet) {
|
||||||
h = resblock_forward("middle_block.0", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
h = resblock_forward("middle_block.0", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||||
if (version != VERSION_SDXL_SSD1B) {
|
if (version != VERSION_SDXL_SSD1B && version != VERSION_SDXL_VEGA) {
|
||||||
h = attention_layer_forward("middle_block.1", ctx, h, context, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
h = attention_layer_forward("middle_block.1", ctx, h, context, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||||
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
h = resblock_forward("middle_block.2", ctx, h, emb, num_video_frames); // [N, 4*model_channels, h/8, w/8]
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user