mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
perf: ratchet streaming budget so plan stops re-merging every step (#1611)
This commit is contained in:
parent
74f513d512
commit
0648f4426b
@ -1708,6 +1708,7 @@ protected:
|
||||
|
||||
size_t max_graph_vram_bytes = 0;
|
||||
bool stream_layers_enabled = false;
|
||||
size_t observed_max_effective_budget_ = 0;
|
||||
|
||||
sd::layer_registry::LayerRegistry layer_registry_;
|
||||
|
||||
@ -2446,7 +2447,7 @@ protected:
|
||||
constexpr size_t safety_margin = 512ull * 1024 * 1024;
|
||||
size_t free_clamp = (free_vram > safety_margin) ? (free_vram - safety_margin) : 0;
|
||||
if (free_clamp < effective_budget) {
|
||||
LOG_INFO("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
|
||||
LOG_DEBUG("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
|
||||
get_desc().c_str(),
|
||||
free_clamp / (1024.0 * 1024.0),
|
||||
effective_budget / (1024.0 * 1024.0));
|
||||
@ -2455,6 +2456,16 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
bool budget_increased = false;
|
||||
if (stream_layers_enabled) {
|
||||
if (effective_budget > observed_max_effective_budget_) {
|
||||
observed_max_effective_budget_ = effective_budget;
|
||||
budget_increased = true;
|
||||
} else {
|
||||
effective_budget = observed_max_effective_budget_;
|
||||
}
|
||||
}
|
||||
|
||||
if (effective_budget_out != nullptr) {
|
||||
*effective_budget_out = effective_budget;
|
||||
}
|
||||
@ -2466,9 +2477,15 @@ protected:
|
||||
params_tensor_set_,
|
||||
get_desc().c_str());
|
||||
if (stream_layers_enabled) {
|
||||
if (budget_increased) {
|
||||
LOG_INFO("%s streaming budget = %.2f MB",
|
||||
get_desc().c_str(),
|
||||
effective_budget / (1024.0 * 1024.0));
|
||||
} else {
|
||||
LOG_DEBUG("%s streaming budget = %.2f MB",
|
||||
get_desc().c_str(),
|
||||
effective_budget / (1024.0 * 1024.0));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -3053,6 +3070,7 @@ public:
|
||||
ggml_backend_buffer_free(params_buffer);
|
||||
params_buffer = nullptr;
|
||||
}
|
||||
observed_max_effective_budget_ = 0;
|
||||
}
|
||||
|
||||
size_t get_params_buffer_size() {
|
||||
|
||||
@ -699,7 +699,7 @@ namespace sd::ggml_graph_cut {
|
||||
}
|
||||
|
||||
if (log_desc != nullptr) {
|
||||
LOG_INFO("%s graph cut max_vram budget merge took %lld ms",
|
||||
LOG_DEBUG("%s graph cut max_vram budget merge took %lld ms",
|
||||
log_desc,
|
||||
ggml_time_ms() - t_budget_begin);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user