mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-17 03:37:20 +00:00
refactor: route cpu placement through backend specs (#1654)
This commit is contained in:
parent
749186c0eb
commit
5db680c2c7
@ -124,16 +124,16 @@ Runtime and parameter assignments also share the same backend cache. If `--backe
|
|||||||
|
|
||||||
## Compatibility flags
|
## Compatibility flags
|
||||||
|
|
||||||
The older CPU placement flags are still supported:
|
The example CLI/server still accepts these older CPU placement flags as compatibility aliases:
|
||||||
|
|
||||||
- `--clip-on-cpu`
|
- `--clip-on-cpu`
|
||||||
- `--vae-on-cpu`
|
- `--vae-on-cpu`
|
||||||
- `--control-net-cpu`
|
- `--control-net-cpu`
|
||||||
- `--offload-to-cpu`
|
- `--offload-to-cpu`
|
||||||
|
|
||||||
`--clip-on-cpu`, `--vae-on-cpu`, and `--control-net-cpu` affect runtime backend assignment only when `--backend` is not set. They map to `te=cpu`, `vae=cpu`, and `controlnet=cpu`.
|
`--clip-on-cpu`, `--vae-on-cpu`, and `--control-net-cpu` are deprecated. The example argument layer prepends `te=cpu`, `vae=cpu`, and `controlnet=cpu` to `--backend` before creating the context.
|
||||||
|
|
||||||
`--offload-to-cpu` prepends a CPU default to the parameter assignment before parsing:
|
`--offload-to-cpu` prepends a CPU default to the parameter assignment in the caller before creating the context:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
--params-backend '*=cpu'
|
--params-backend '*=cpu'
|
||||||
@ -141,4 +141,4 @@ The older CPU placement flags are still supported:
|
|||||||
|
|
||||||
Because this default is inserted first, later explicit `--params-backend` entries can still override it, for example `--offload-to-cpu --params-backend te=disk` keeps non-TE parameters on CPU and reloads TE parameters from disk.
|
Because this default is inserted first, later explicit `--params-backend` entries can still override it, for example `--offload-to-cpu --params-backend te=disk` keeps non-TE parameters on CPU and reloads TE parameters from disk.
|
||||||
|
|
||||||
Explicit `--backend` and `--params-backend` assignments are preferred for new commands.
|
Library callers should set `backend` and `params_backend` directly. The old CPU/offload fields are no longer part of the C API. Explicit `--backend` and `--params-backend` assignments are preferred for new commands.
|
||||||
|
|||||||
@ -31,7 +31,7 @@ Use CPU params to reduce VRAM usage:
|
|||||||
--backend cuda0 --params-backend cpu
|
--backend cuda0 --params-backend cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
This keeps model weights in system RAM and moves them to the runtime backend when needed. `--offload-to-cpu` is a compatibility shortcut that prepends `*=cpu` to `--params-backend`, so explicit module assignments can still override it:
|
This keeps model weights in system RAM and moves them to the runtime backend when needed. In the example CLI/server, `--offload-to-cpu` is a compatibility shortcut that prepends `*=cpu` to `--params-backend` before creating the context, so explicit module assignments can still override it:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
--offload-to-cpu --params-backend te=disk
|
--offload-to-cpu --params-backend te=disk
|
||||||
|
|||||||
@ -63,9 +63,9 @@ Context Options:
|
|||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||||
when needed
|
when needed
|
||||||
--mmap whether to memory-map model
|
--mmap whether to memory-map model
|
||||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
--control-net-cpu deprecated; use --backend controlnet=cpu
|
||||||
--clip-on-cpu keep clip in cpu (for low vram)
|
--clip-on-cpu deprecated; use --backend te=cpu
|
||||||
--vae-on-cpu keep vae in cpu (for low vram)
|
--vae-on-cpu deprecated; use --backend vae=cpu
|
||||||
--fa use flash attention
|
--fa use flash attention
|
||||||
--diffusion-fa use flash attention in the diffusion model only
|
--diffusion-fa use flash attention in the diffusion model only
|
||||||
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||||
|
|||||||
@ -782,12 +782,11 @@ int main(int argc, const char* argv[]) {
|
|||||||
int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth
|
int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth
|
||||||
if (ctx_params.esrgan_path.size() > 0 && gen_params.upscale_repeats > 0) {
|
if (ctx_params.esrgan_path.size() > 0 && gen_params.upscale_repeats > 0) {
|
||||||
UpscalerCtxPtr upscaler_ctx(new_upscaler_ctx(ctx_params.esrgan_path.c_str(),
|
UpscalerCtxPtr upscaler_ctx(new_upscaler_ctx(ctx_params.esrgan_path.c_str(),
|
||||||
ctx_params.offload_params_to_cpu,
|
|
||||||
ctx_params.diffusion_conv_direct,
|
ctx_params.diffusion_conv_direct,
|
||||||
ctx_params.n_threads,
|
ctx_params.n_threads,
|
||||||
gen_params.upscale_tile_size,
|
gen_params.upscale_tile_size,
|
||||||
ctx_params.backend.c_str(),
|
sd_ctx_params.backend,
|
||||||
ctx_params.params_backend.c_str()));
|
sd_ctx_params.params_backend));
|
||||||
|
|
||||||
if (upscaler_ctx == nullptr) {
|
if (upscaler_ctx == nullptr) {
|
||||||
LOG_ERROR("new_upscaler_ctx failed");
|
LOG_ERROR("new_upscaler_ctx failed");
|
||||||
|
|||||||
@ -51,6 +51,10 @@ static sd_vae_format_t str_to_vae_format(const std::string& value) {
|
|||||||
return SD_VAE_FORMAT_COUNT;
|
return SD_VAE_FORMAT_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void prepend_backend_assignment(std::string& spec, const char* assignment) {
|
||||||
|
spec = spec.empty() ? assignment : std::string(assignment) + "," + spec;
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
static std::string utf16_to_utf8(const std::wstring& wstr) {
|
static std::string utf16_to_utf8(const std::wstring& wstr) {
|
||||||
if (wstr.empty())
|
if (wstr.empty())
|
||||||
@ -463,15 +467,15 @@ ArgOptions SDContextParams::get_options() {
|
|||||||
true, &enable_mmap},
|
true, &enable_mmap},
|
||||||
{"",
|
{"",
|
||||||
"--control-net-cpu",
|
"--control-net-cpu",
|
||||||
"keep controlnet in cpu (for low vram)",
|
"deprecated; use --backend controlnet=cpu",
|
||||||
true, &control_net_cpu},
|
true, &control_net_cpu},
|
||||||
{"",
|
{"",
|
||||||
"--clip-on-cpu",
|
"--clip-on-cpu",
|
||||||
"keep clip in cpu (for low vram)",
|
"deprecated; use --backend te=cpu",
|
||||||
true, &clip_on_cpu},
|
true, &clip_on_cpu},
|
||||||
{"",
|
{"",
|
||||||
"--vae-on-cpu",
|
"--vae-on-cpu",
|
||||||
"keep vae in cpu (for low vram)",
|
"deprecated; use --backend vae=cpu",
|
||||||
true, &vae_on_cpu},
|
true, &vae_on_cpu},
|
||||||
{"",
|
{"",
|
||||||
"--fa",
|
"--fa",
|
||||||
@ -688,6 +692,25 @@ bool SDContextParams::resolve_and_validate(SDMode mode) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SDContextParams::prepare_backend_assignments() {
|
||||||
|
effective_backend = backend;
|
||||||
|
effective_params_backend = params_backend;
|
||||||
|
|
||||||
|
if (offload_params_to_cpu) {
|
||||||
|
prepend_backend_assignment(effective_params_backend, "*=cpu");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clip_on_cpu) {
|
||||||
|
prepend_backend_assignment(effective_backend, "te=cpu");
|
||||||
|
}
|
||||||
|
if (vae_on_cpu) {
|
||||||
|
prepend_backend_assignment(effective_backend, "vae=cpu");
|
||||||
|
}
|
||||||
|
if (control_net_cpu) {
|
||||||
|
prepend_backend_assignment(effective_backend, "controlnet=cpu");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string SDContextParams::to_string() const {
|
std::string SDContextParams::to_string() const {
|
||||||
std::ostringstream emb_ss;
|
std::ostringstream emb_ss;
|
||||||
emb_ss << "{\n";
|
emb_ss << "{\n";
|
||||||
@ -758,6 +781,7 @@ std::string SDContextParams::to_string() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool taesd_preview) {
|
sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool taesd_preview) {
|
||||||
|
prepare_backend_assignments();
|
||||||
embedding_vec.clear();
|
embedding_vec.clear();
|
||||||
embedding_vec.reserve(embedding_map.size());
|
embedding_vec.reserve(embedding_map.size());
|
||||||
for (const auto& kv : embedding_map) {
|
for (const auto& kv : embedding_map) {
|
||||||
@ -767,55 +791,51 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool taesd_preview) {
|
|||||||
embedding_vec.emplace_back(item);
|
embedding_vec.emplace_back(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
sd_ctx_params_t sd_ctx_params = {
|
sd_ctx_params_t sd_ctx_params;
|
||||||
model_path.c_str(),
|
sd_ctx_params_init(&sd_ctx_params);
|
||||||
clip_l_path.c_str(),
|
sd_ctx_params.model_path = model_path.c_str();
|
||||||
clip_g_path.c_str(),
|
sd_ctx_params.clip_l_path = clip_l_path.c_str();
|
||||||
clip_vision_path.c_str(),
|
sd_ctx_params.clip_g_path = clip_g_path.c_str();
|
||||||
t5xxl_path.c_str(),
|
sd_ctx_params.clip_vision_path = clip_vision_path.c_str();
|
||||||
llm_path.c_str(),
|
sd_ctx_params.t5xxl_path = t5xxl_path.c_str();
|
||||||
llm_vision_path.c_str(),
|
sd_ctx_params.llm_path = llm_path.c_str();
|
||||||
diffusion_model_path.c_str(),
|
sd_ctx_params.llm_vision_path = llm_vision_path.c_str();
|
||||||
high_noise_diffusion_model_path.c_str(),
|
sd_ctx_params.diffusion_model_path = diffusion_model_path.c_str();
|
||||||
uncond_diffusion_model_path.c_str(),
|
sd_ctx_params.high_noise_diffusion_model_path = high_noise_diffusion_model_path.c_str();
|
||||||
embeddings_connectors_path.c_str(),
|
sd_ctx_params.uncond_diffusion_model_path = uncond_diffusion_model_path.c_str();
|
||||||
vae_path.c_str(),
|
sd_ctx_params.embeddings_connectors_path = embeddings_connectors_path.c_str();
|
||||||
audio_vae_path.c_str(),
|
sd_ctx_params.vae_path = vae_path.c_str();
|
||||||
taesd_path.c_str(),
|
sd_ctx_params.audio_vae_path = audio_vae_path.c_str();
|
||||||
control_net_path.c_str(),
|
sd_ctx_params.taesd_path = taesd_path.c_str();
|
||||||
embedding_vec.data(),
|
sd_ctx_params.control_net_path = control_net_path.c_str();
|
||||||
static_cast<uint32_t>(embedding_vec.size()),
|
sd_ctx_params.embeddings = embedding_vec.data();
|
||||||
photo_maker_path.c_str(),
|
sd_ctx_params.embedding_count = static_cast<uint32_t>(embedding_vec.size());
|
||||||
tensor_type_rules.c_str(),
|
sd_ctx_params.photo_maker_path = photo_maker_path.c_str();
|
||||||
n_threads,
|
sd_ctx_params.tensor_type_rules = tensor_type_rules.c_str();
|
||||||
wtype,
|
sd_ctx_params.n_threads = n_threads;
|
||||||
rng_type,
|
sd_ctx_params.wtype = wtype;
|
||||||
sampler_rng_type,
|
sd_ctx_params.rng_type = rng_type;
|
||||||
prediction,
|
sd_ctx_params.sampler_rng_type = sampler_rng_type;
|
||||||
lora_apply_mode,
|
sd_ctx_params.prediction = prediction;
|
||||||
offload_params_to_cpu,
|
sd_ctx_params.lora_apply_mode = lora_apply_mode;
|
||||||
enable_mmap,
|
sd_ctx_params.enable_mmap = enable_mmap;
|
||||||
clip_on_cpu,
|
sd_ctx_params.flash_attn = flash_attn;
|
||||||
control_net_cpu,
|
sd_ctx_params.diffusion_flash_attn = diffusion_flash_attn;
|
||||||
vae_on_cpu,
|
sd_ctx_params.tae_preview_only = taesd_preview;
|
||||||
flash_attn,
|
sd_ctx_params.diffusion_conv_direct = diffusion_conv_direct;
|
||||||
diffusion_flash_attn,
|
sd_ctx_params.vae_conv_direct = vae_conv_direct;
|
||||||
taesd_preview,
|
sd_ctx_params.circular_x = circular || circular_x;
|
||||||
diffusion_conv_direct,
|
sd_ctx_params.circular_y = circular || circular_y;
|
||||||
vae_conv_direct,
|
sd_ctx_params.force_sdxl_vae_conv_scale = force_sdxl_vae_conv_scale;
|
||||||
circular || circular_x,
|
sd_ctx_params.chroma_use_dit_mask = chroma_use_dit_mask;
|
||||||
circular || circular_y,
|
sd_ctx_params.chroma_use_t5_mask = chroma_use_t5_mask;
|
||||||
force_sdxl_vae_conv_scale,
|
sd_ctx_params.chroma_t5_mask_pad = chroma_t5_mask_pad;
|
||||||
chroma_use_dit_mask,
|
sd_ctx_params.qwen_image_zero_cond_t = qwen_image_zero_cond_t;
|
||||||
chroma_use_t5_mask,
|
sd_ctx_params.vae_format = str_to_vae_format(vae_format);
|
||||||
chroma_t5_mask_pad,
|
sd_ctx_params.max_vram = max_vram;
|
||||||
qwen_image_zero_cond_t,
|
sd_ctx_params.stream_layers = stream_layers;
|
||||||
str_to_vae_format(vae_format),
|
sd_ctx_params.backend = effective_backend.c_str();
|
||||||
max_vram,
|
sd_ctx_params.params_backend = effective_params_backend.c_str();
|
||||||
stream_layers,
|
|
||||||
backend.c_str(),
|
|
||||||
params_backend.c_str(),
|
|
||||||
};
|
|
||||||
return sd_ctx_params;
|
return sd_ctx_params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -148,6 +148,8 @@ struct SDContextParams {
|
|||||||
bool stream_layers = false;
|
bool stream_layers = false;
|
||||||
std::string backend;
|
std::string backend;
|
||||||
std::string params_backend;
|
std::string params_backend;
|
||||||
|
std::string effective_backend;
|
||||||
|
std::string effective_params_backend;
|
||||||
bool enable_mmap = false;
|
bool enable_mmap = false;
|
||||||
bool control_net_cpu = false;
|
bool control_net_cpu = false;
|
||||||
bool clip_on_cpu = false;
|
bool clip_on_cpu = false;
|
||||||
@ -175,6 +177,7 @@ struct SDContextParams {
|
|||||||
float flow_shift = INFINITY;
|
float flow_shift = INFINITY;
|
||||||
ArgOptions get_options();
|
ArgOptions get_options();
|
||||||
void build_embedding_map();
|
void build_embedding_map();
|
||||||
|
void prepare_backend_assignments();
|
||||||
bool resolve(SDMode mode);
|
bool resolve(SDMode mode);
|
||||||
bool validate(SDMode mode);
|
bool validate(SDMode mode);
|
||||||
bool resolve_and_validate(SDMode mode);
|
bool resolve_and_validate(SDMode mode);
|
||||||
|
|||||||
@ -165,9 +165,9 @@ Context Options:
|
|||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||||
when needed
|
when needed
|
||||||
--mmap whether to memory-map model
|
--mmap whether to memory-map model
|
||||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
--control-net-cpu deprecated; use --backend controlnet=cpu
|
||||||
--clip-on-cpu keep clip in cpu (for low vram)
|
--clip-on-cpu deprecated; use --backend te=cpu
|
||||||
--vae-on-cpu keep vae in cpu (for low vram)
|
--vae-on-cpu deprecated; use --backend vae=cpu
|
||||||
--fa use flash attention
|
--fa use flash attention
|
||||||
--diffusion-fa use flash attention in the diffusion model only
|
--diffusion-fa use flash attention in the diffusion model only
|
||||||
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||||
|
|||||||
@ -202,11 +202,7 @@ typedef struct {
|
|||||||
enum rng_type_t sampler_rng_type;
|
enum rng_type_t sampler_rng_type;
|
||||||
enum prediction_t prediction;
|
enum prediction_t prediction;
|
||||||
enum lora_apply_mode_t lora_apply_mode;
|
enum lora_apply_mode_t lora_apply_mode;
|
||||||
bool offload_params_to_cpu;
|
|
||||||
bool enable_mmap;
|
bool enable_mmap;
|
||||||
bool keep_clip_on_cpu;
|
|
||||||
bool keep_control_net_on_cpu;
|
|
||||||
bool keep_vae_on_cpu;
|
|
||||||
bool flash_attn;
|
bool flash_attn;
|
||||||
bool diffusion_flash_attn;
|
bool diffusion_flash_attn;
|
||||||
bool tae_preview_only;
|
bool tae_preview_only;
|
||||||
@ -458,7 +454,6 @@ SD_API bool generate_video(sd_ctx_t* sd_ctx,
|
|||||||
typedef struct upscaler_ctx_t upscaler_ctx_t;
|
typedef struct upscaler_ctx_t upscaler_ctx_t;
|
||||||
|
|
||||||
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
|
||||||
bool direct,
|
bool direct,
|
||||||
int n_threads,
|
int n_threads,
|
||||||
int tile_size,
|
int tile_size,
|
||||||
|
|||||||
@ -545,9 +545,6 @@ bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule modu
|
|||||||
|
|
||||||
bool SDBackendManager::init(const char* backend_spec,
|
bool SDBackendManager::init(const char* backend_spec,
|
||||||
const char* params_backend_spec,
|
const char* params_backend_spec,
|
||||||
bool keep_clip_on_cpu,
|
|
||||||
bool keep_vae_on_cpu,
|
|
||||||
bool keep_control_net_on_cpu,
|
|
||||||
std::string* error) {
|
std::string* error) {
|
||||||
reset();
|
reset();
|
||||||
|
|
||||||
@ -558,18 +555,6 @@ bool SDBackendManager::init(const char* backend_spec,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_assignment_.empty()) {
|
|
||||||
if (keep_clip_on_cpu) {
|
|
||||||
runtime_assignment_.set_module(SDBackendModule::TE, "cpu");
|
|
||||||
}
|
|
||||||
if (keep_vae_on_cpu) {
|
|
||||||
runtime_assignment_.set_module(SDBackendModule::VAE, "cpu");
|
|
||||||
}
|
|
||||||
if (keep_control_net_on_cpu) {
|
|
||||||
runtime_assignment_.set_module(SDBackendModule::CONTROL_NET, "cpu");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return validate(error);
|
return validate(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -51,9 +51,6 @@ public:
|
|||||||
|
|
||||||
bool init(const char* backend_spec,
|
bool init(const char* backend_spec,
|
||||||
const char* params_backend_spec,
|
const char* params_backend_spec,
|
||||||
bool keep_clip_on_cpu,
|
|
||||||
bool keep_vae_on_cpu,
|
|
||||||
bool keep_control_net_on_cpu,
|
|
||||||
std::string* error);
|
std::string* error);
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
|
|||||||
@ -187,7 +187,6 @@ public:
|
|||||||
|
|
||||||
std::string taesd_path;
|
std::string taesd_path;
|
||||||
sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0, 0, nullptr};
|
sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0, 0, nullptr};
|
||||||
bool offload_params_to_cpu = false;
|
|
||||||
bool enable_mmap = false;
|
bool enable_mmap = false;
|
||||||
float max_vram = 0.f;
|
float max_vram = 0.f;
|
||||||
bool stream_layers = false;
|
bool stream_layers = false;
|
||||||
@ -250,13 +249,10 @@ public:
|
|||||||
params_mem_size);
|
params_mem_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool init_backend(const sd_ctx_params_t* sd_ctx_params) {
|
bool init_backend() {
|
||||||
std::string error;
|
std::string error;
|
||||||
if (!backend_manager.init(sd_ctx_params->backend,
|
if (!backend_manager.init(backend_spec.c_str(),
|
||||||
params_backend_spec.c_str(),
|
params_backend_spec.c_str(),
|
||||||
sd_ctx_params->keep_clip_on_cpu,
|
|
||||||
sd_ctx_params->keep_vae_on_cpu,
|
|
||||||
sd_ctx_params->keep_control_net_on_cpu,
|
|
||||||
&error)) {
|
&error)) {
|
||||||
LOG_ERROR("backend config failed: %s", error.c_str());
|
LOG_ERROR("backend config failed: %s", error.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -317,15 +313,11 @@ public:
|
|||||||
|
|
||||||
bool init(const sd_ctx_params_t* sd_ctx_params) {
|
bool init(const sd_ctx_params_t* sd_ctx_params) {
|
||||||
n_threads = sd_ctx_params->n_threads;
|
n_threads = sd_ctx_params->n_threads;
|
||||||
offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu;
|
|
||||||
enable_mmap = sd_ctx_params->enable_mmap;
|
enable_mmap = sd_ctx_params->enable_mmap;
|
||||||
max_vram = sd_ctx_params->max_vram;
|
max_vram = sd_ctx_params->max_vram;
|
||||||
stream_layers = sd_ctx_params->stream_layers;
|
stream_layers = sd_ctx_params->stream_layers;
|
||||||
backend_spec = SAFE_STR(sd_ctx_params->backend);
|
backend_spec = SAFE_STR(sd_ctx_params->backend);
|
||||||
params_backend_spec = SAFE_STR(sd_ctx_params->params_backend);
|
params_backend_spec = SAFE_STR(sd_ctx_params->params_backend);
|
||||||
if (offload_params_to_cpu) {
|
|
||||||
params_backend_spec = params_backend_spec.empty() ? "*=cpu" : "*=cpu," + params_backend_spec;
|
|
||||||
}
|
|
||||||
if (stream_layers && max_vram == 0.f) {
|
if (stream_layers && max_vram == 0.f) {
|
||||||
LOG_WARN("--stream-layers has no effect without --max-vram set; ignoring");
|
LOG_WARN("--stream-layers has no effect without --max-vram set; ignoring");
|
||||||
stream_layers = false;
|
stream_layers = false;
|
||||||
@ -344,7 +336,7 @@ public:
|
|||||||
|
|
||||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||||
|
|
||||||
if (!init_backend(sd_ctx_params)) {
|
if (!init_backend()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (stream_layers && !backend_manager.params_backend_is_cpu(SDBackendModule::DIFFUSION)) {
|
if (stream_layers && !backend_manager.params_backend_is_cpu(SDBackendModule::DIFFUSION)) {
|
||||||
@ -534,8 +526,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Avoid full-model LoRA merge buffers on constrained setups.
|
// Avoid full-model LoRA merge buffers on constrained setups.
|
||||||
const bool streaming_constrained = stream_layers ||
|
const bool params_offloaded = params_backend_for(SDBackendModule::DIFFUSION) != backend_for(SDBackendModule::DIFFUSION);
|
||||||
sd_ctx_params->offload_params_to_cpu;
|
const bool streaming_constrained = stream_layers || params_offloaded;
|
||||||
if (have_quantized_weight || streaming_constrained) {
|
if (have_quantized_weight || streaming_constrained) {
|
||||||
apply_lora_immediately = false;
|
apply_lora_immediately = false;
|
||||||
} else {
|
} else {
|
||||||
@ -2622,13 +2614,9 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_ctx_params->sampler_rng_type = RNG_TYPE_COUNT;
|
sd_ctx_params->sampler_rng_type = RNG_TYPE_COUNT;
|
||||||
sd_ctx_params->prediction = PREDICTION_COUNT;
|
sd_ctx_params->prediction = PREDICTION_COUNT;
|
||||||
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
|
sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO;
|
||||||
sd_ctx_params->offload_params_to_cpu = false;
|
|
||||||
sd_ctx_params->max_vram = 0.f;
|
sd_ctx_params->max_vram = 0.f;
|
||||||
sd_ctx_params->stream_layers = false;
|
sd_ctx_params->stream_layers = false;
|
||||||
sd_ctx_params->enable_mmap = false;
|
sd_ctx_params->enable_mmap = false;
|
||||||
sd_ctx_params->keep_clip_on_cpu = false;
|
|
||||||
sd_ctx_params->keep_control_net_on_cpu = false;
|
|
||||||
sd_ctx_params->keep_vae_on_cpu = false;
|
|
||||||
sd_ctx_params->diffusion_flash_attn = false;
|
sd_ctx_params->diffusion_flash_attn = false;
|
||||||
sd_ctx_params->circular_x = false;
|
sd_ctx_params->circular_x = false;
|
||||||
sd_ctx_params->circular_y = false;
|
sd_ctx_params->circular_y = false;
|
||||||
@ -2669,14 +2657,10 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
"rng_type: %s\n"
|
"rng_type: %s\n"
|
||||||
"sampler_rng_type: %s\n"
|
"sampler_rng_type: %s\n"
|
||||||
"prediction: %s\n"
|
"prediction: %s\n"
|
||||||
"offload_params_to_cpu: %s\n"
|
|
||||||
"max_vram: %.3f\n"
|
"max_vram: %.3f\n"
|
||||||
"stream_layers: %s\n"
|
"stream_layers: %s\n"
|
||||||
"backend: %s\n"
|
"backend: %s\n"
|
||||||
"params_backend: %s\n"
|
"params_backend: %s\n"
|
||||||
"keep_clip_on_cpu: %s\n"
|
|
||||||
"keep_control_net_on_cpu: %s\n"
|
|
||||||
"keep_vae_on_cpu: %s\n"
|
|
||||||
"flash_attn: %s\n"
|
"flash_attn: %s\n"
|
||||||
"diffusion_flash_attn: %s\n"
|
"diffusion_flash_attn: %s\n"
|
||||||
"circular_x: %s\n"
|
"circular_x: %s\n"
|
||||||
@ -2707,14 +2691,10 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_rng_type_name(sd_ctx_params->rng_type),
|
sd_rng_type_name(sd_ctx_params->rng_type),
|
||||||
sd_rng_type_name(sd_ctx_params->sampler_rng_type),
|
sd_rng_type_name(sd_ctx_params->sampler_rng_type),
|
||||||
sd_prediction_name(sd_ctx_params->prediction),
|
sd_prediction_name(sd_ctx_params->prediction),
|
||||||
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
|
|
||||||
sd_ctx_params->max_vram,
|
sd_ctx_params->max_vram,
|
||||||
BOOL_STR(sd_ctx_params->stream_layers),
|
BOOL_STR(sd_ctx_params->stream_layers),
|
||||||
SAFE_STR(sd_ctx_params->backend),
|
SAFE_STR(sd_ctx_params->backend),
|
||||||
SAFE_STR(sd_ctx_params->params_backend),
|
SAFE_STR(sd_ctx_params->params_backend),
|
||||||
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
|
|
||||||
BOOL_STR(sd_ctx_params->keep_control_net_on_cpu),
|
|
||||||
BOOL_STR(sd_ctx_params->keep_vae_on_cpu),
|
|
||||||
BOOL_STR(sd_ctx_params->flash_attn),
|
BOOL_STR(sd_ctx_params->flash_attn),
|
||||||
BOOL_STR(sd_ctx_params->diffusion_flash_attn),
|
BOOL_STR(sd_ctx_params->diffusion_flash_attn),
|
||||||
BOOL_STR(sd_ctx_params->circular_x),
|
BOOL_STR(sd_ctx_params->circular_x),
|
||||||
@ -4436,7 +4416,6 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
|||||||
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
||||||
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||||
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
||||||
sd_ctx->sd->offload_params_to_cpu,
|
|
||||||
sd_ctx->sd->n_threads)) {
|
sd_ctx->sd->n_threads)) {
|
||||||
LOG_ERROR("load hires model upscaler failed");
|
LOG_ERROR("load hires model upscaler failed");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|||||||
@ -39,20 +39,12 @@ void UpscalerGGML::set_stream_layers_enabled(bool enabled) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
bool UpscalerGGML::load_from_file(const std::string& esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
|
||||||
int n_threads) {
|
int n_threads) {
|
||||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||||
|
|
||||||
std::string effective_params_backend_spec = params_backend_spec;
|
|
||||||
if (offload_params_to_cpu) {
|
|
||||||
effective_params_backend_spec = effective_params_backend_spec.empty() ? "*=cpu" : "*=cpu," + effective_params_backend_spec;
|
|
||||||
}
|
|
||||||
std::string error;
|
std::string error;
|
||||||
if (!backend_manager.init(backend_spec.c_str(),
|
if (!backend_manager.init(backend_spec.c_str(),
|
||||||
effective_params_backend_spec.c_str(),
|
params_backend_spec.c_str(),
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
&error)) {
|
&error)) {
|
||||||
LOG_ERROR("upscaler backend config failed: %s", error.c_str());
|
LOG_ERROR("upscaler backend config failed: %s", error.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -181,7 +173,6 @@ struct upscaler_ctx_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
||||||
bool offload_params_to_cpu,
|
|
||||||
bool direct,
|
bool direct,
|
||||||
int n_threads,
|
int n_threads,
|
||||||
int tile_size,
|
int tile_size,
|
||||||
@ -198,7 +189,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) {
|
if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, n_threads)) {
|
||||||
delete upscaler_ctx->upscaler;
|
delete upscaler_ctx->upscaler;
|
||||||
upscaler_ctx->upscaler = nullptr;
|
upscaler_ctx->upscaler = nullptr;
|
||||||
free(upscaler_ctx);
|
free(upscaler_ctx);
|
||||||
|
|||||||
@ -32,7 +32,6 @@ struct UpscalerGGML {
|
|||||||
~UpscalerGGML();
|
~UpscalerGGML();
|
||||||
|
|
||||||
bool load_from_file(const std::string& esrgan_path,
|
bool load_from_file(const std::string& esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
|
||||||
int n_threads);
|
int n_threads);
|
||||||
void set_max_graph_vram_bytes(size_t max_vram_bytes);
|
void set_max_graph_vram_bytes(size_t max_vram_bytes);
|
||||||
void set_stream_layers_enabled(bool enabled);
|
void set_stream_layers_enabled(bool enabled);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user