mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
4 Commits
b017918106
...
fce6afcc6a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fce6afcc6a | ||
|
|
49d6570c43 | ||
|
|
6bbaf161ad | ||
|
|
87cdbd5978 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,10 +1,10 @@
|
||||
build*/
|
||||
cmake-build-*/
|
||||
test/
|
||||
.vscode/
|
||||
.idea/
|
||||
.cache/
|
||||
*.swp
|
||||
.vscode/
|
||||
.idea/
|
||||
*.bat
|
||||
*.bin
|
||||
*.exe
|
||||
|
||||
@ -149,3 +149,7 @@ if (SD_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
|
||||
set(SD_PUBLIC_HEADERS stable-diffusion.h)
|
||||
set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")
|
||||
|
||||
install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
|
||||
|
||||
29
denoiser.hpp
29
denoiser.hpp
@ -251,6 +251,35 @@ struct KarrasSchedule : SigmaSchedule {
|
||||
}
|
||||
};
|
||||
|
||||
// Close to Beta Schedule, but increadably simple in code.
|
||||
struct SmoothStepSchedule : SigmaSchedule {
|
||||
static constexpr float smoothstep(float x) {
|
||||
return x * x * (3.0f - 2.0f * x);
|
||||
}
|
||||
|
||||
std::vector<float> get_sigmas(uint32_t n, float /*sigma_min*/, float /*sigma_max*/, t_to_sigma_t t_to_sigma) override {
|
||||
std::vector<float> result;
|
||||
result.reserve(n + 1);
|
||||
|
||||
const int t_max = TIMESTEPS - 1;
|
||||
if (n == 0) {
|
||||
return result;
|
||||
} else if (n == 1) {
|
||||
result.push_back(t_to_sigma((float)t_max));
|
||||
result.push_back(0.f);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
float u = 1.f - float(i) / float(n);
|
||||
result.push_back(t_to_sigma(std::round(smoothstep(u) * t_max)));
|
||||
}
|
||||
|
||||
result.push_back(0.f);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
struct Denoiser {
|
||||
std::shared_ptr<SigmaSchedule> scheduler = std::make_shared<DiscreteSchedule>();
|
||||
virtual float sigma_min() = 0;
|
||||
|
||||
@ -95,8 +95,9 @@ struct MMDiTModel : public DiffusionModel {
|
||||
|
||||
MMDiTModel(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
bool flash_attn = false,
|
||||
const String2GGMLType& tensor_types = {})
|
||||
: mmdit(backend, offload_params_to_cpu, tensor_types, "model.diffusion_model") {
|
||||
: mmdit(backend, offload_params_to_cpu, flash_attn, tensor_types, "model.diffusion_model") {
|
||||
}
|
||||
|
||||
std::string get_desc() {
|
||||
|
||||
@ -238,7 +238,7 @@ void print_usage(int argc, const char* argv[]) {
|
||||
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --scheduler {discrete, karras, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" sampling method (default: \"euler_a\")\n");
|
||||
printf(" --steps STEPS number of sample steps (default: 20)\n");
|
||||
@ -251,7 +251,7 @@ void print_usage(int argc, const char* argv[]) {
|
||||
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
|
||||
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
|
||||
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
|
||||
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
|
||||
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
|
||||
printf(" (high noise) sampling method (default: \"euler_a\")\n");
|
||||
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
|
||||
|
||||
@ -56,6 +56,25 @@
|
||||
#define __STATIC_INLINE__ static inline
|
||||
#endif
|
||||
|
||||
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void*) {
|
||||
switch (level) {
|
||||
case GGML_LOG_LEVEL_DEBUG:
|
||||
LOG_DEBUG(text);
|
||||
break;
|
||||
case GGML_LOG_LEVEL_INFO:
|
||||
LOG_INFO(text);
|
||||
break;
|
||||
case GGML_LOG_LEVEL_WARN:
|
||||
LOG_WARN(text);
|
||||
break;
|
||||
case GGML_LOG_LEVEL_ERROR:
|
||||
LOG_ERROR(text);
|
||||
break;
|
||||
default:
|
||||
LOG_DEBUG(text);
|
||||
}
|
||||
}
|
||||
|
||||
static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128");
|
||||
|
||||
// n-mode trensor-matrix product
|
||||
@ -124,13 +143,6 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
|
||||
b);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const char* text, void* user_data) {
|
||||
(void)level;
|
||||
(void)user_data;
|
||||
fputs(text, stderr);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
__STATIC_INLINE__ void ggml_tensor_set_f32_randn(struct ggml_tensor* tensor, std::shared_ptr<RNG> rng) {
|
||||
uint32_t n = (uint32_t)ggml_nelements(tensor);
|
||||
std::vector<float> random_numbers = rng->randn(n);
|
||||
|
||||
53
mmdit.hpp
53
mmdit.hpp
@ -147,14 +147,16 @@ public:
|
||||
int64_t num_heads;
|
||||
bool pre_only;
|
||||
std::string qk_norm;
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
SelfAttention(int64_t dim,
|
||||
int64_t num_heads = 8,
|
||||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), qk_norm(qk_norm) {
|
||||
bool pre_only = false,
|
||||
bool flash_attn = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), qk_norm(qk_norm), flash_attn(flash_attn) {
|
||||
int64_t d_head = dim / num_heads;
|
||||
blocks["qkv"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim * 3, qkv_bias));
|
||||
if (!pre_only) {
|
||||
@ -206,8 +208,8 @@ public:
|
||||
ggml_backend_t backend,
|
||||
struct ggml_tensor* x) {
|
||||
auto qkv = pre_attention(ctx, x);
|
||||
x = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
x = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, true); // [N, n_token, dim]
|
||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||
return x;
|
||||
}
|
||||
};
|
||||
@ -232,6 +234,7 @@ public:
|
||||
int64_t num_heads;
|
||||
bool pre_only;
|
||||
bool self_attn;
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
DismantledBlock(int64_t hidden_size,
|
||||
@ -240,16 +243,17 @@ public:
|
||||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false,
|
||||
bool self_attn = false)
|
||||
bool self_attn = false,
|
||||
bool flash_attn = false)
|
||||
: num_heads(num_heads), pre_only(pre_only), self_attn(self_attn) {
|
||||
// rmsnorm is always Flase
|
||||
// scale_mod_only is always Flase
|
||||
// swiglu is always Flase
|
||||
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
||||
blocks["attn"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, pre_only));
|
||||
blocks["attn"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, pre_only, flash_attn));
|
||||
|
||||
if (self_attn) {
|
||||
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, false));
|
||||
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new SelfAttention(hidden_size, num_heads, qk_norm, qkv_bias, false, flash_attn));
|
||||
}
|
||||
|
||||
if (!pre_only) {
|
||||
@ -435,8 +439,8 @@ public:
|
||||
auto qkv2 = std::get<1>(qkv_intermediates);
|
||||
auto intermediates = std::get<2>(qkv_intermediates);
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, backend, qkv2[0], qkv2[1], qkv2[2], num_heads); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
auto attn2_out = ggml_nn_attention_ext(ctx, backend, qkv2[0], qkv2[1], qkv2[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention_x(ctx,
|
||||
attn_out,
|
||||
attn2_out,
|
||||
@ -452,7 +456,7 @@ public:
|
||||
auto qkv = qkv_intermediates.first;
|
||||
auto intermediates = qkv_intermediates.second;
|
||||
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads); // [N, n_token, dim]
|
||||
auto attn_out = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], num_heads, NULL, false, false, flash_attn); // [N, n_token, dim]
|
||||
x = post_attention(ctx,
|
||||
attn_out,
|
||||
intermediates[0],
|
||||
@ -468,6 +472,7 @@ public:
|
||||
__STATIC_INLINE__ std::pair<struct ggml_tensor*, struct ggml_tensor*>
|
||||
block_mixing(struct ggml_context* ctx,
|
||||
ggml_backend_t backend,
|
||||
bool flash_attn,
|
||||
struct ggml_tensor* context,
|
||||
struct ggml_tensor* x,
|
||||
struct ggml_tensor* c,
|
||||
@ -497,8 +502,8 @@ block_mixing(struct ggml_context* ctx,
|
||||
qkv.push_back(ggml_concat(ctx, context_qkv[i], x_qkv[i], 1));
|
||||
}
|
||||
|
||||
auto attn = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], x_block->num_heads); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto attn = ggml_nn_attention_ext(ctx, backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, NULL, false, false, flash_attn); // [N, n_context + n_token, hidden_size]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
||||
auto context_attn = ggml_view_3d(ctx,
|
||||
attn,
|
||||
attn->ne[0],
|
||||
@ -556,6 +561,8 @@ block_mixing(struct ggml_context* ctx,
|
||||
}
|
||||
|
||||
struct JointBlock : public GGMLBlock {
|
||||
bool flash_attn;
|
||||
|
||||
public:
|
||||
JointBlock(int64_t hidden_size,
|
||||
int64_t num_heads,
|
||||
@ -563,9 +570,11 @@ public:
|
||||
std::string qk_norm = "",
|
||||
bool qkv_bias = false,
|
||||
bool pre_only = false,
|
||||
bool self_attn_x = false) {
|
||||
blocks["context_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, pre_only));
|
||||
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x));
|
||||
bool self_attn_x = false,
|
||||
bool flash_attn = false)
|
||||
: flash_attn(flash_attn) {
|
||||
blocks["context_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, pre_only, false, flash_attn));
|
||||
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x, flash_attn));
|
||||
}
|
||||
|
||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(struct ggml_context* ctx,
|
||||
@ -576,7 +585,7 @@ public:
|
||||
auto context_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["context_block"]);
|
||||
auto x_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["x_block"]);
|
||||
|
||||
return block_mixing(ctx, backend, context, x, c, context_block, x_block);
|
||||
return block_mixing(ctx, backend, flash_attn, context, x, c, context_block, x_block);
|
||||
}
|
||||
};
|
||||
|
||||
@ -634,6 +643,7 @@ protected:
|
||||
int64_t context_embedder_out_dim = 1536;
|
||||
int64_t hidden_size;
|
||||
std::string qk_norm;
|
||||
bool flash_attn = false;
|
||||
|
||||
void init_params(struct ggml_context* ctx, const String2GGMLType& tensor_types = {}, std::string prefix = "") {
|
||||
enum ggml_type wtype = GGML_TYPE_F32;
|
||||
@ -641,7 +651,8 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
MMDiT(const String2GGMLType& tensor_types = {}) {
|
||||
MMDiT(bool flash_attn = false, const String2GGMLType& tensor_types = {})
|
||||
: flash_attn(flash_attn) {
|
||||
// input_size is always None
|
||||
// learn_sigma is always False
|
||||
// register_length is alwalys 0
|
||||
@ -709,7 +720,8 @@ public:
|
||||
qk_norm,
|
||||
true,
|
||||
i == depth - 1,
|
||||
i <= d_self));
|
||||
i <= d_self,
|
||||
flash_attn));
|
||||
}
|
||||
|
||||
blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new FinalLayer(hidden_size, patch_size, out_channels));
|
||||
@ -856,9 +868,10 @@ struct MMDiTRunner : public GGMLRunner {
|
||||
|
||||
MMDiTRunner(ggml_backend_t backend,
|
||||
bool offload_params_to_cpu,
|
||||
bool flash_attn,
|
||||
const String2GGMLType& tensor_types = {},
|
||||
const std::string prefix = "")
|
||||
: GGMLRunner(backend, offload_params_to_cpu), mmdit(tensor_types) {
|
||||
: GGMLRunner(backend, offload_params_to_cpu), mmdit(flash_attn, tensor_types) {
|
||||
mmdit.init(params_ctx, tensor_types, prefix);
|
||||
}
|
||||
|
||||
@ -957,7 +970,7 @@ struct MMDiTRunner : public GGMLRunner {
|
||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
||||
ggml_type model_data_type = GGML_TYPE_F16;
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::shared_ptr<MMDiTRunner>(new MMDiTRunner(backend, false));
|
||||
std::shared_ptr<MMDiTRunner> mmdit = std::shared_ptr<MMDiTRunner>(new MMDiTRunner(backend, false, false));
|
||||
{
|
||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||
|
||||
|
||||
@ -145,7 +145,6 @@ public:
|
||||
#endif
|
||||
#ifdef SD_USE_METAL
|
||||
LOG_DEBUG("Using Metal backend");
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
#ifdef SD_USE_VULKAN
|
||||
@ -192,6 +191,8 @@ public:
|
||||
rng = std::make_shared<PhiloxRNG>();
|
||||
}
|
||||
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
|
||||
init_backend();
|
||||
|
||||
ModelLoader model_loader;
|
||||
@ -349,6 +350,7 @@ public:
|
||||
model_loader.tensor_storages_types);
|
||||
diffusion_model = std::make_shared<MMDiTModel>(backend,
|
||||
offload_params_to_cpu,
|
||||
sd_ctx_params->diffusion_flash_attn,
|
||||
model_loader.tensor_storages_types);
|
||||
} else if (sd_version_is_flux(version)) {
|
||||
bool is_chroma = false;
|
||||
@ -750,6 +752,10 @@ public:
|
||||
denoiser->scheduler = std::make_shared<GITSSchedule>();
|
||||
denoiser->scheduler->version = version;
|
||||
break;
|
||||
case SMOOTHSTEP:
|
||||
LOG_INFO("Running with SmoothStep scheduler");
|
||||
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
|
||||
break;
|
||||
case DEFAULT:
|
||||
// Don't touch anything.
|
||||
break;
|
||||
@ -1533,6 +1539,7 @@ const char* schedule_to_str[] = {
|
||||
"exponential",
|
||||
"ays",
|
||||
"gits",
|
||||
"smoothstep",
|
||||
};
|
||||
|
||||
const char* sd_schedule_name(enum scheduler_t scheduler) {
|
||||
|
||||
@ -57,6 +57,7 @@ enum scheduler_t {
|
||||
EXPONENTIAL,
|
||||
AYS,
|
||||
GITS,
|
||||
SMOOTHSTEP,
|
||||
SCHEDULE_COUNT
|
||||
};
|
||||
|
||||
|
||||
@ -19,13 +19,13 @@ struct UpscalerGGML {
|
||||
|
||||
bool load_from_file(const std::string& esrgan_path,
|
||||
bool offload_params_to_cpu) {
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
#ifdef SD_USE_CUDA
|
||||
LOG_DEBUG("Using CUDA backend");
|
||||
backend = ggml_backend_cuda_init(0);
|
||||
#endif
|
||||
#ifdef SD_USE_METAL
|
||||
LOG_DEBUG("Using Metal backend");
|
||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||
backend = ggml_backend_metal_init();
|
||||
#endif
|
||||
#ifdef SD_USE_VULKAN
|
||||
|
||||
5
util.cpp
5
util.cpp
@ -414,7 +414,10 @@ void log_printf(sd_log_level_t level, const char* file, int line, const char* fo
|
||||
if (written >= 0 && written < LOG_BUFFER_SIZE) {
|
||||
vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
|
||||
}
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - strlen(log_buffer));
|
||||
size_t len = strlen(log_buffer);
|
||||
if (log_buffer[len - 1] != '\n') {
|
||||
strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
|
||||
}
|
||||
|
||||
if (sd_log_cb) {
|
||||
sd_log_cb(level, log_buffer, sd_log_cb_data);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user