mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-03-25 10:48:50 +00:00
Compare commits
No commits in common. "b88cc32346add6d18a0cbc55d68de08533d4ca53" and "742a7333c3a054a62630262496edb5cb1ebcd0ae" have entirely different histories.
b88cc32346
...
742a7333c3
@ -242,18 +242,14 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// net_1 is nn.Dropout(), skip for inference
|
// net_1 is nn.Dropout(), skip for inference
|
||||||
bool force_prec_f32 = false;
|
float scale = 1.f;
|
||||||
float scale = 1.f;
|
|
||||||
if (precision_fix) {
|
if (precision_fix) {
|
||||||
scale = 1.f / 128.f;
|
scale = 1.f / 128.f;
|
||||||
#ifdef SD_USE_VULKAN
|
|
||||||
force_prec_f32 = true;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
||||||
// For example, when using Vulkan without enabling force_prec_f32,
|
// For example, when using Vulkan without enabling force_prec_f32,
|
||||||
// or when using CUDA but the weights are k-quants.
|
// or when using CUDA but the weights are k-quants.
|
||||||
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32, scale));
|
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, false, scale));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
||||||
|
|||||||
@ -95,7 +95,6 @@ Options:
|
|||||||
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||||
type of the weight file
|
type of the weight file
|
||||||
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
|
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
|
||||||
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
|
|
||||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||||
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
||||||
|
|||||||
@ -110,22 +110,21 @@ struct SDParams {
|
|||||||
int fps = 16;
|
int fps = 16;
|
||||||
float vace_strength = 1.f;
|
float vace_strength = 1.f;
|
||||||
|
|
||||||
float strength = 0.75f;
|
float strength = 0.75f;
|
||||||
float control_strength = 0.9f;
|
float control_strength = 0.9f;
|
||||||
rng_type_t rng_type = CUDA_RNG;
|
rng_type_t rng_type = CUDA_RNG;
|
||||||
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
|
int64_t seed = 42;
|
||||||
int64_t seed = 42;
|
bool verbose = false;
|
||||||
bool verbose = false;
|
bool offload_params_to_cpu = false;
|
||||||
bool offload_params_to_cpu = false;
|
bool control_net_cpu = false;
|
||||||
bool control_net_cpu = false;
|
bool clip_on_cpu = false;
|
||||||
bool clip_on_cpu = false;
|
bool vae_on_cpu = false;
|
||||||
bool vae_on_cpu = false;
|
bool diffusion_flash_attn = false;
|
||||||
bool diffusion_flash_attn = false;
|
bool diffusion_conv_direct = false;
|
||||||
bool diffusion_conv_direct = false;
|
bool vae_conv_direct = false;
|
||||||
bool vae_conv_direct = false;
|
bool canny_preprocess = false;
|
||||||
bool canny_preprocess = false;
|
bool color = false;
|
||||||
bool color = false;
|
int upscale_repeats = 1;
|
||||||
int upscale_repeats = 1;
|
|
||||||
|
|
||||||
// Photo Maker
|
// Photo Maker
|
||||||
std::string photo_maker_path;
|
std::string photo_maker_path;
|
||||||
@ -215,7 +214,6 @@ void print_params(SDParams params) {
|
|||||||
printf(" flow_shift: %.2f\n", params.flow_shift);
|
printf(" flow_shift: %.2f\n", params.flow_shift);
|
||||||
printf(" strength(img2img): %.2f\n", params.strength);
|
printf(" strength(img2img): %.2f\n", params.strength);
|
||||||
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
||||||
printf(" sampler rng: %s\n", sd_rng_type_name(params.sampler_rng_type));
|
|
||||||
printf(" seed: %zd\n", params.seed);
|
printf(" seed: %zd\n", params.seed);
|
||||||
printf(" batch_count: %d\n", params.batch_count);
|
printf(" batch_count: %d\n", params.batch_count);
|
||||||
printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false");
|
printf(" vae_tiling: %s\n", params.vae_tiling_params.enabled ? "true" : "false");
|
||||||
@ -888,20 +886,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto on_sampler_rng_arg = [&](int argc, const char** argv, int index) {
|
|
||||||
if (++index >= argc) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
const char* arg = argv[index];
|
|
||||||
params.sampler_rng_type = str_to_rng_type(arg);
|
|
||||||
if (params.sampler_rng_type == RNG_TYPE_COUNT) {
|
|
||||||
fprintf(stderr, "error: invalid sampler rng type %s\n",
|
|
||||||
arg);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto on_schedule_arg = [&](int argc, const char** argv, int index) {
|
auto on_schedule_arg = [&](int argc, const char** argv, int index) {
|
||||||
if (++index >= argc) {
|
if (++index >= argc) {
|
||||||
return -1;
|
return -1;
|
||||||
@ -1142,10 +1126,6 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
"--rng",
|
"--rng",
|
||||||
"RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)",
|
"RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)",
|
||||||
on_rng_arg},
|
on_rng_arg},
|
||||||
{"",
|
|
||||||
"--sampler-rng",
|
|
||||||
"sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng",
|
|
||||||
on_sampler_rng_arg},
|
|
||||||
{"-s",
|
{"-s",
|
||||||
"--seed",
|
"--seed",
|
||||||
"RNG seed (default: 42, use random seed for < 0)",
|
"RNG seed (default: 42, use random seed for < 0)",
|
||||||
@ -1261,6 +1241,10 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.mode != CONVERT && params.tensor_type_rules.size() > 0) {
|
||||||
|
fprintf(stderr, "warning: --tensor-type-rules is currently supported only for conversion\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (params.mode == VID_GEN && params.video_frames <= 0) {
|
if (params.mode == VID_GEN && params.video_frames <= 0) {
|
||||||
fprintf(stderr, "warning: --video-frames must be at least 1\n");
|
fprintf(stderr, "warning: --video-frames must be at least 1\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -1339,9 +1323,6 @@ std::string get_image_params(SDParams params, int64_t seed) {
|
|||||||
parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
|
parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
|
||||||
parameter_string += "Model: " + sd_basename(params.model_path) + ", ";
|
parameter_string += "Model: " + sd_basename(params.model_path) + ", ";
|
||||||
parameter_string += "RNG: " + std::string(sd_rng_type_name(params.rng_type)) + ", ";
|
parameter_string += "RNG: " + std::string(sd_rng_type_name(params.rng_type)) + ", ";
|
||||||
if (params.sampler_rng_type != RNG_TYPE_COUNT) {
|
|
||||||
parameter_string += "Sampler RNG: " + std::string(sd_rng_type_name(params.sampler_rng_type)) + ", ";
|
|
||||||
}
|
|
||||||
parameter_string += "Sampler: " + std::string(sd_sample_method_name(params.sample_params.sample_method));
|
parameter_string += "Sampler: " + std::string(sd_sample_method_name(params.sample_params.sample_method));
|
||||||
if (params.sample_params.scheduler != DEFAULT) {
|
if (params.sample_params.scheduler != DEFAULT) {
|
||||||
parameter_string += " " + std::string(sd_schedule_name(params.sample_params.scheduler));
|
parameter_string += " " + std::string(sd_schedule_name(params.sample_params.scheduler));
|
||||||
@ -1775,13 +1756,11 @@ int main(int argc, const char* argv[]) {
|
|||||||
params.lora_model_dir.c_str(),
|
params.lora_model_dir.c_str(),
|
||||||
params.embedding_dir.c_str(),
|
params.embedding_dir.c_str(),
|
||||||
params.photo_maker_path.c_str(),
|
params.photo_maker_path.c_str(),
|
||||||
params.tensor_type_rules.c_str(),
|
|
||||||
vae_decode_only,
|
vae_decode_only,
|
||||||
true,
|
true,
|
||||||
params.n_threads,
|
params.n_threads,
|
||||||
params.wtype,
|
params.wtype,
|
||||||
params.rng_type,
|
params.rng_type,
|
||||||
params.sampler_rng_type,
|
|
||||||
params.prediction,
|
params.prediction,
|
||||||
params.lora_apply_mode,
|
params.lora_apply_mode,
|
||||||
params.offload_params_to_cpu,
|
params.offload_params_to_cpu,
|
||||||
|
|||||||
87
model.cpp
87
model.cpp
@ -1254,59 +1254,15 @@ std::map<ggml_type, uint32_t> ModelLoader::get_vae_wtype_stat() {
|
|||||||
return wtype_stat;
|
return wtype_stat;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) {
|
void ModelLoader::set_wtype_override(ggml_type wtype, std::string prefix) {
|
||||||
std::vector<std::pair<std::string, ggml_type>> result;
|
|
||||||
for (const auto& item : split_string(tensor_type_rules, ',')) {
|
|
||||||
if (item.size() == 0)
|
|
||||||
continue;
|
|
||||||
std::string::size_type pos = item.find('=');
|
|
||||||
if (pos == std::string::npos) {
|
|
||||||
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
std::string tensor_pattern = item.substr(0, pos);
|
|
||||||
std::string type_name = item.substr(pos + 1);
|
|
||||||
|
|
||||||
ggml_type tensor_type = GGML_TYPE_COUNT;
|
|
||||||
|
|
||||||
if (type_name == "f32") {
|
|
||||||
tensor_type = GGML_TYPE_F32;
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i < GGML_TYPE_COUNT; i++) {
|
|
||||||
auto trait = ggml_get_type_traits((ggml_type)i);
|
|
||||||
if (trait->to_float && trait->type_size && type_name == trait->type_name) {
|
|
||||||
tensor_type = (ggml_type)i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tensor_type != GGML_TYPE_COUNT) {
|
|
||||||
result.emplace_back(tensor_pattern, tensor_type);
|
|
||||||
} else {
|
|
||||||
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_rules) {
|
|
||||||
auto map_rules = parse_tensor_type_rules(tensor_type_rules);
|
|
||||||
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
ggml_type dst_type = wtype;
|
if (!starts_with(name, prefix)) {
|
||||||
for (const auto& tensor_type_rule : map_rules) {
|
|
||||||
std::regex pattern(tensor_type_rule.first);
|
|
||||||
if (std::regex_search(name, pattern)) {
|
|
||||||
dst_type = tensor_type_rule.second;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (dst_type == GGML_TYPE_COUNT) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!tensor_should_be_converted(tensor_storage, dst_type)) {
|
if (!tensor_should_be_converted(tensor_storage, wtype)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
tensor_storage.expected_type = dst_type;
|
tensor_storage.expected_type = wtype;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1647,6 +1603,41 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<std::string, ggml_type>> parse_tensor_type_rules(const std::string& tensor_type_rules) {
|
||||||
|
std::vector<std::pair<std::string, ggml_type>> result;
|
||||||
|
for (const auto& item : split_string(tensor_type_rules, ',')) {
|
||||||
|
if (item.size() == 0)
|
||||||
|
continue;
|
||||||
|
std::string::size_type pos = item.find('=');
|
||||||
|
if (pos == std::string::npos) {
|
||||||
|
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::string tensor_pattern = item.substr(0, pos);
|
||||||
|
std::string type_name = item.substr(pos + 1);
|
||||||
|
|
||||||
|
ggml_type tensor_type = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
|
if (type_name == "f32") {
|
||||||
|
tensor_type = GGML_TYPE_F32;
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < GGML_TYPE_COUNT; i++) {
|
||||||
|
auto trait = ggml_get_type_traits((ggml_type)i);
|
||||||
|
if (trait->to_float && trait->type_size && type_name == trait->type_name) {
|
||||||
|
tensor_type = (ggml_type)i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tensor_type != GGML_TYPE_COUNT) {
|
||||||
|
result.emplace_back(tensor_pattern, tensor_type);
|
||||||
|
} else {
|
||||||
|
LOG_WARN("ignoring invalid quant override \"%s\"", item.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type) {
|
bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type) {
|
||||||
const std::string& name = tensor_storage.name;
|
const std::string& name = tensor_storage.name;
|
||||||
if (type != GGML_TYPE_COUNT) {
|
if (type != GGML_TYPE_COUNT) {
|
||||||
|
|||||||
2
model.h
2
model.h
@ -292,7 +292,7 @@ public:
|
|||||||
std::map<ggml_type, uint32_t> get_diffusion_model_wtype_stat();
|
std::map<ggml_type, uint32_t> get_diffusion_model_wtype_stat();
|
||||||
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
|
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
|
||||||
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
|
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
|
||||||
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
|
void set_wtype_override(ggml_type wtype, std::string prefix = "");
|
||||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
|
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
|
||||||
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
||||||
std::set<std::string> ignore_tensors = {},
|
std::set<std::string> ignore_tensors = {},
|
||||||
|
|||||||
@ -94,14 +94,10 @@ namespace Qwen {
|
|||||||
blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
||||||
blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
||||||
|
|
||||||
float scale = 1.f / 32.f;
|
float scale = 1.f / 32.f;
|
||||||
bool force_prec_f32 = false;
|
|
||||||
#ifdef SD_USE_VULKAN
|
|
||||||
force_prec_f32 = true;
|
|
||||||
#endif
|
|
||||||
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
||||||
// For example when using CUDA but the weights are k-quants (not all prompts).
|
// For example when using CUDA but the weights are k-quants (not all prompts).
|
||||||
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, force_prec_f32, scale));
|
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, false, scale));
|
||||||
// to_out.1 is nn.Dropout
|
// to_out.1 is nn.Dropout
|
||||||
|
|
||||||
blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));
|
blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));
|
||||||
|
|||||||
@ -99,11 +99,10 @@ public:
|
|||||||
bool vae_decode_only = false;
|
bool vae_decode_only = false;
|
||||||
bool free_params_immediately = false;
|
bool free_params_immediately = false;
|
||||||
|
|
||||||
std::shared_ptr<RNG> rng = std::make_shared<PhiloxRNG>();
|
std::shared_ptr<RNG> rng = std::make_shared<STDDefaultRNG>();
|
||||||
std::shared_ptr<RNG> sampler_rng = nullptr;
|
int n_threads = -1;
|
||||||
int n_threads = -1;
|
float scale_factor = 0.18215f;
|
||||||
float scale_factor = 0.18215f;
|
float shift_factor = 0.f;
|
||||||
float shift_factor = 0.f;
|
|
||||||
|
|
||||||
std::shared_ptr<Conditioner> cond_stage_model;
|
std::shared_ptr<Conditioner> cond_stage_model;
|
||||||
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd or wan2.1 i2v
|
std::shared_ptr<FrozenCLIPVisionEmbedder> clip_vision; // for svd or wan2.1 i2v
|
||||||
@ -189,16 +188,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<RNG> get_rng(rng_type_t rng_type) {
|
|
||||||
if (rng_type == STD_DEFAULT_RNG) {
|
|
||||||
return std::make_shared<STDDefaultRNG>();
|
|
||||||
} else if (rng_type == CPU_RNG) {
|
|
||||||
return std::make_shared<MT19937RNG>();
|
|
||||||
} else { // default: CUDA_RNG
|
|
||||||
return std::make_shared<PhiloxRNG>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool init(const sd_ctx_params_t* sd_ctx_params) {
|
bool init(const sd_ctx_params_t* sd_ctx_params) {
|
||||||
n_threads = sd_ctx_params->n_threads;
|
n_threads = sd_ctx_params->n_threads;
|
||||||
vae_decode_only = sd_ctx_params->vae_decode_only;
|
vae_decode_only = sd_ctx_params->vae_decode_only;
|
||||||
@ -208,11 +197,12 @@ public:
|
|||||||
use_tiny_autoencoder = taesd_path.size() > 0;
|
use_tiny_autoencoder = taesd_path.size() > 0;
|
||||||
offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu;
|
offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu;
|
||||||
|
|
||||||
rng = get_rng(sd_ctx_params->rng_type);
|
if (sd_ctx_params->rng_type == STD_DEFAULT_RNG) {
|
||||||
if (sd_ctx_params->sampler_rng_type != RNG_TYPE_COUNT && sd_ctx_params->sampler_rng_type != sd_ctx_params->rng_type) {
|
rng = std::make_shared<STDDefaultRNG>();
|
||||||
sampler_rng = get_rng(sd_ctx_params->sampler_rng_type);
|
} else if (sd_ctx_params->rng_type == CUDA_RNG) {
|
||||||
} else {
|
rng = std::make_shared<PhiloxRNG>();
|
||||||
sampler_rng = rng;
|
} else if (sd_ctx_params->rng_type == CPU_RNG) {
|
||||||
|
rng = std::make_shared<MT19937RNG>();
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||||
@ -314,12 +304,11 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("Version: %s ", model_version_to_str[version]);
|
LOG_INFO("Version: %s ", model_version_to_str[version]);
|
||||||
ggml_type wtype = (int)sd_ctx_params->wtype < std::min<int>(SD_TYPE_COUNT, GGML_TYPE_COUNT)
|
ggml_type wtype = (int)sd_ctx_params->wtype < std::min<int>(SD_TYPE_COUNT, GGML_TYPE_COUNT)
|
||||||
? (ggml_type)sd_ctx_params->wtype
|
? (ggml_type)sd_ctx_params->wtype
|
||||||
: GGML_TYPE_COUNT;
|
: GGML_TYPE_COUNT;
|
||||||
std::string tensor_type_rules = SAFE_STR(sd_ctx_params->tensor_type_rules);
|
if (wtype != GGML_TYPE_COUNT) {
|
||||||
if (wtype != GGML_TYPE_COUNT || tensor_type_rules.size() > 0) {
|
model_loader.set_wtype_override(wtype);
|
||||||
model_loader.set_wtype_override(wtype, tensor_type_rules);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<ggml_type, uint32_t> wtype_stat = model_loader.get_wtype_stat();
|
std::map<ggml_type, uint32_t> wtype_stat = model_loader.get_wtype_stat();
|
||||||
@ -1746,7 +1735,7 @@ public:
|
|||||||
return denoised;
|
return denoised;
|
||||||
};
|
};
|
||||||
|
|
||||||
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, sampler_rng, eta);
|
sample_k_diffusion(method, denoise, work_ctx, x, sigmas, rng, eta);
|
||||||
|
|
||||||
if (inverse_noise_scaling) {
|
if (inverse_noise_scaling) {
|
||||||
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
|
x = denoiser->inverse_noise_scaling(sigmas[sigmas.size() - 1], x);
|
||||||
@ -2301,7 +2290,6 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_ctx_params->n_threads = get_num_physical_cores();
|
sd_ctx_params->n_threads = get_num_physical_cores();
|
||||||
sd_ctx_params->wtype = SD_TYPE_COUNT;
|
sd_ctx_params->wtype = SD_TYPE_COUNT;
|
||||||
sd_ctx_params->rng_type = CUDA_RNG;
|
sd_ctx_params->rng_type = CUDA_RNG;
|
||||||
sd_ctx_params->sampler_rng_type = RNG_TYPE_COUNT;
|
|
||||||
sd_ctx_params->prediction = DEFAULT_PRED;
|
sd_ctx_params->prediction = DEFAULT_PRED;
|
||||||
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
|
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
|
||||||
sd_ctx_params->offload_params_to_cpu = false;
|
sd_ctx_params->offload_params_to_cpu = false;
|
||||||
@ -2337,13 +2325,11 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
"lora_model_dir: %s\n"
|
"lora_model_dir: %s\n"
|
||||||
"embedding_dir: %s\n"
|
"embedding_dir: %s\n"
|
||||||
"photo_maker_path: %s\n"
|
"photo_maker_path: %s\n"
|
||||||
"tensor_type_rules: %s\n"
|
|
||||||
"vae_decode_only: %s\n"
|
"vae_decode_only: %s\n"
|
||||||
"free_params_immediately: %s\n"
|
"free_params_immediately: %s\n"
|
||||||
"n_threads: %d\n"
|
"n_threads: %d\n"
|
||||||
"wtype: %s\n"
|
"wtype: %s\n"
|
||||||
"rng_type: %s\n"
|
"rng_type: %s\n"
|
||||||
"sampler_rng_type: %s\n"
|
|
||||||
"prediction: %s\n"
|
"prediction: %s\n"
|
||||||
"offload_params_to_cpu: %s\n"
|
"offload_params_to_cpu: %s\n"
|
||||||
"keep_clip_on_cpu: %s\n"
|
"keep_clip_on_cpu: %s\n"
|
||||||
@ -2368,13 +2354,11 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
SAFE_STR(sd_ctx_params->lora_model_dir),
|
SAFE_STR(sd_ctx_params->lora_model_dir),
|
||||||
SAFE_STR(sd_ctx_params->embedding_dir),
|
SAFE_STR(sd_ctx_params->embedding_dir),
|
||||||
SAFE_STR(sd_ctx_params->photo_maker_path),
|
SAFE_STR(sd_ctx_params->photo_maker_path),
|
||||||
SAFE_STR(sd_ctx_params->tensor_type_rules),
|
|
||||||
BOOL_STR(sd_ctx_params->vae_decode_only),
|
BOOL_STR(sd_ctx_params->vae_decode_only),
|
||||||
BOOL_STR(sd_ctx_params->free_params_immediately),
|
BOOL_STR(sd_ctx_params->free_params_immediately),
|
||||||
sd_ctx_params->n_threads,
|
sd_ctx_params->n_threads,
|
||||||
sd_type_name(sd_ctx_params->wtype),
|
sd_type_name(sd_ctx_params->wtype),
|
||||||
sd_rng_type_name(sd_ctx_params->rng_type),
|
sd_rng_type_name(sd_ctx_params->rng_type),
|
||||||
sd_rng_type_name(sd_ctx_params->sampler_rng_type),
|
|
||||||
sd_prediction_name(sd_ctx_params->prediction),
|
sd_prediction_name(sd_ctx_params->prediction),
|
||||||
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
|
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
|
||||||
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
|
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
|
||||||
@ -2673,24 +2657,18 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
LOG_WARN("Turn off PhotoMaker");
|
LOG_WARN("Turn off PhotoMaker");
|
||||||
sd_ctx->sd->stacked_id = false;
|
sd_ctx->sd->stacked_id = false;
|
||||||
} else {
|
} else {
|
||||||
if (pm_params.id_images_count != id_embeds->ne[1]) {
|
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
|
||||||
LOG_WARN("PhotoMaker image count (%d) does NOT match ID embeds (%d). You should run face_detect.py again.", pm_params.id_images_count, id_embeds->ne[1]);
|
int64_t t1 = ggml_time_ms();
|
||||||
LOG_WARN("Turn off PhotoMaker");
|
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
|
||||||
sd_ctx->sd->stacked_id = false;
|
if (sd_ctx->sd->free_params_immediately) {
|
||||||
} else {
|
sd_ctx->sd->pmid_model->free_params_buffer();
|
||||||
id_cond.c_crossattn = sd_ctx->sd->id_encoder(work_ctx, init_img, id_cond.c_crossattn, id_embeds, class_tokens_mask);
|
}
|
||||||
int64_t t1 = ggml_time_ms();
|
// Encode input prompt without the trigger word for delayed conditioning
|
||||||
LOG_INFO("Photomaker ID Stacking, taking %" PRId64 " ms", t1 - t0);
|
prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt);
|
||||||
if (sd_ctx->sd->free_params_immediately) {
|
// printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str());
|
||||||
sd_ctx->sd->pmid_model->free_params_buffer();
|
prompt = prompt_text_only; //
|
||||||
}
|
if (sample_steps < 50) {
|
||||||
// Encode input prompt without the trigger word for delayed conditioning
|
LOG_WARN("It's recommended to use >= 50 steps for photo maker!");
|
||||||
prompt_text_only = sd_ctx->sd->cond_stage_model->remove_trigger_from_prompt(work_ctx, prompt);
|
|
||||||
// printf("%s || %s \n", prompt.c_str(), prompt_text_only.c_str());
|
|
||||||
prompt = prompt_text_only; //
|
|
||||||
if (sample_steps < 50) {
|
|
||||||
LOG_WARN("It's recommended to use >= 50 steps for photo maker!");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -2836,7 +2814,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
|
|||||||
LOG_INFO("generating image: %i/%i - seed %" PRId64, b + 1, batch_count, cur_seed);
|
LOG_INFO("generating image: %i/%i - seed %" PRId64, b + 1, batch_count, cur_seed);
|
||||||
|
|
||||||
sd_ctx->sd->rng->manual_seed(cur_seed);
|
sd_ctx->sd->rng->manual_seed(cur_seed);
|
||||||
sd_ctx->sd->sampler_rng->manual_seed(cur_seed);
|
|
||||||
struct ggml_tensor* x_t = init_latent;
|
struct ggml_tensor* x_t = init_latent;
|
||||||
struct ggml_tensor* noise = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
|
struct ggml_tensor* noise = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, W, H, C, 1);
|
||||||
ggml_ext_im_set_randn_f32(noise, sd_ctx->sd->rng);
|
ggml_ext_im_set_randn_f32(noise, sd_ctx->sd->rng);
|
||||||
@ -2963,7 +2940,6 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
|
|||||||
seed = rand();
|
seed = rand();
|
||||||
}
|
}
|
||||||
sd_ctx->sd->rng->manual_seed(seed);
|
sd_ctx->sd->rng->manual_seed(seed);
|
||||||
sd_ctx->sd->sampler_rng->manual_seed(seed);
|
|
||||||
|
|
||||||
int sample_steps = sd_img_gen_params->sample_params.sample_steps;
|
int sample_steps = sd_img_gen_params->sample_params.sample_steps;
|
||||||
|
|
||||||
@ -3255,7 +3231,6 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
|
|||||||
}
|
}
|
||||||
|
|
||||||
sd_ctx->sd->rng->manual_seed(seed);
|
sd_ctx->sd->rng->manual_seed(seed);
|
||||||
sd_ctx->sd->sampler_rng->manual_seed(seed);
|
|
||||||
|
|
||||||
int64_t t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
|
|
||||||
|
|||||||
@ -167,13 +167,11 @@ typedef struct {
|
|||||||
const char* lora_model_dir;
|
const char* lora_model_dir;
|
||||||
const char* embedding_dir;
|
const char* embedding_dir;
|
||||||
const char* photo_maker_path;
|
const char* photo_maker_path;
|
||||||
const char* tensor_type_rules;
|
|
||||||
bool vae_decode_only;
|
bool vae_decode_only;
|
||||||
bool free_params_immediately;
|
bool free_params_immediately;
|
||||||
int n_threads;
|
int n_threads;
|
||||||
enum sd_type_t wtype;
|
enum sd_type_t wtype;
|
||||||
enum rng_type_t rng_type;
|
enum rng_type_t rng_type;
|
||||||
enum rng_type_t sampler_rng_type;
|
|
||||||
enum prediction_t prediction;
|
enum prediction_t prediction;
|
||||||
enum lora_apply_mode_t lora_apply_mode;
|
enum lora_apply_mode_t lora_apply_mode;
|
||||||
bool offload_params_to_cpu;
|
bool offload_params_to_cpu;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user