mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
feat: make negative max_vram control the amount of spare vram (#1503)
This commit is contained in:
parent
21fd4e6788
commit
f683c88a28
@ -55,7 +55,8 @@ Context Options:
|
||||
then threads will be set to the number of CPU physical cores
|
||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
||||
graph splitting; -1 auto-detects free VRAM minus 1 GiB
|
||||
graph splitting; a negative value auto-detects free VRAM, sparing the
|
||||
specified value (e.g. -0.5 will keep at least 0.5 GiB free)
|
||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||
when needed
|
||||
|
||||
@ -413,7 +413,7 @@ ArgOptions SDContextParams::get_options() {
|
||||
options.float_options = {
|
||||
{"",
|
||||
"--max-vram",
|
||||
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting; -1 auto-detects free VRAM minus 1 GiB",
|
||||
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting; a negative value auto-detects free VRAM, sparing the specified value (e.g. -0.5 will keep at least 0.5 GiB free)",
|
||||
&max_vram},
|
||||
};
|
||||
|
||||
|
||||
@ -157,7 +157,8 @@ Context Options:
|
||||
then threads will be set to the number of CPU physical cores
|
||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
||||
graph splitting; -1 auto-detects free VRAM minus 1 GiB
|
||||
graph splitting; a negative value auto-detects free VRAM, sparing the
|
||||
specified value (e.g. -0.5 will keep at least 0.5 GiB free)
|
||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||
when needed
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
namespace sd::ggml_graph_cut {
|
||||
|
||||
static constexpr double MAX_VRAM_BYTES_PER_GIB = 1024.0 * 1024.0 * 1024.0;
|
||||
static constexpr size_t MAX_VRAM_AUTO_RESERVE_BYTES = 1024ULL * 1024ULL * 1024ULL;
|
||||
|
||||
static std::string graph_cut_tensor_display_name(const ggml_tensor* tensor) {
|
||||
if (tensor == nullptr) {
|
||||
@ -93,45 +92,47 @@ namespace sd::ggml_graph_cut {
|
||||
return static_cast<float>(static_cast<double>(max_vram_bytes) / MAX_VRAM_BYTES_PER_GIB);
|
||||
}
|
||||
|
||||
static size_t resolve_auto_max_vram_bytes(ggml_backend_t backend) {
|
||||
static size_t resolve_auto_max_vram_bytes(float spare_vram, ggml_backend_t backend) {
|
||||
if (backend == nullptr) {
|
||||
LOG_WARN("--max-vram -1 requested, but no backend is available; disabling graph splitting");
|
||||
LOG_WARN("--max-vram < 0 requested, but no backend is available; disabling graph splitting");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||
if (dev == nullptr) {
|
||||
LOG_WARN("--max-vram -1 requested, but no backend device is available; disabling graph splitting");
|
||||
LOG_WARN("--max-vram < 0 requested, but no backend device is available; disabling graph splitting");
|
||||
return 0;
|
||||
}
|
||||
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||
LOG_WARN("--max-vram -1 requested, but the main backend is CPU; disabling graph splitting");
|
||||
LOG_WARN("--max-vram < 0 requested, but the main backend is CPU; disabling graph splitting");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t free_vram = 0;
|
||||
size_t total_vram = 0;
|
||||
ggml_backend_dev_memory(dev, &free_vram, &total_vram);
|
||||
size_t spare_bytes = static_cast<size_t>(MAX_VRAM_BYTES_PER_GIB * spare_vram);
|
||||
|
||||
if (free_vram <= MAX_VRAM_AUTO_RESERVE_BYTES) {
|
||||
LOG_WARN("--max-vram -1 requested, but free VRAM is %.2f GiB; reserving 1.00 GiB leaves no graph budget",
|
||||
free_vram / MAX_VRAM_BYTES_PER_GIB);
|
||||
if (free_vram <= spare_bytes) {
|
||||
LOG_WARN("--max-vram < 0 requested, but free VRAM is %.2f GiB; reserving %.2f GiB leaves no graph budget",
|
||||
free_vram / MAX_VRAM_BYTES_PER_GIB, spare_vram);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const size_t max_vram_bytes = free_vram - MAX_VRAM_AUTO_RESERVE_BYTES;
|
||||
LOG_INFO("--max-vram -1 auto-detected %.2f GiB free VRAM (%.2f GiB total), reserving 1.00 GiB; using %.2f GiB",
|
||||
const size_t max_vram_bytes = free_vram - spare_bytes;
|
||||
LOG_INFO("--max-vram < 0 auto-detected %.2f GiB free VRAM (%.2f GiB total), reserving %.2f GiB; using %.2f GiB",
|
||||
free_vram / MAX_VRAM_BYTES_PER_GIB,
|
||||
total_vram / MAX_VRAM_BYTES_PER_GIB,
|
||||
spare_vram,
|
||||
max_vram_bytes / MAX_VRAM_BYTES_PER_GIB);
|
||||
return max_vram_bytes;
|
||||
}
|
||||
|
||||
float resolve_max_vram_gib(float max_vram, ggml_backend_t backend) {
|
||||
if (max_vram != -1.f) {
|
||||
if (max_vram >= 0.f) {
|
||||
return max_vram;
|
||||
}
|
||||
return max_vram_bytes_to_gib(resolve_auto_max_vram_bytes(backend));
|
||||
return max_vram_bytes_to_gib(resolve_auto_max_vram_bytes(-max_vram, backend));
|
||||
}
|
||||
|
||||
static Segment make_segment_seed(const Plan& plan,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user