perf: ratchet streaming budget so plan stops re-merging every step (#1611)

This commit is contained in:
fszontagh 2026-06-06 10:32:03 +02:00 committed by GitHub
parent 74f513d512
commit 0648f4426b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 12 deletions

View File

@ -1706,8 +1706,9 @@ protected:
std::unordered_set<ggml_tensor*> resident_param_set;
uint64_t resident_state_token = 0;
size_t max_graph_vram_bytes = 0;
bool stream_layers_enabled = false;
size_t max_graph_vram_bytes = 0;
bool stream_layers_enabled = false;
size_t observed_max_effective_budget_ = 0;
sd::layer_registry::LayerRegistry layer_registry_;
@ -2446,15 +2447,25 @@ protected:
constexpr size_t safety_margin = 512ull * 1024 * 1024;
size_t free_clamp = (free_vram > safety_margin) ? (free_vram - safety_margin) : 0;
if (free_clamp < effective_budget) {
LOG_INFO("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
get_desc().c_str(),
free_clamp / (1024.0 * 1024.0),
effective_budget / (1024.0 * 1024.0));
LOG_DEBUG("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
get_desc().c_str(),
free_clamp / (1024.0 * 1024.0),
effective_budget / (1024.0 * 1024.0));
effective_budget = free_clamp;
}
}
}
bool budget_increased = false;
if (stream_layers_enabled) {
if (effective_budget > observed_max_effective_budget_) {
observed_max_effective_budget_ = effective_budget;
budget_increased = true;
} else {
effective_budget = observed_max_effective_budget_;
}
}
if (effective_budget_out != nullptr) {
*effective_budget_out = effective_budget;
}
@ -2466,9 +2477,15 @@ protected:
params_tensor_set_,
get_desc().c_str());
if (stream_layers_enabled) {
LOG_INFO("%s streaming budget = %.2f MB",
get_desc().c_str(),
effective_budget / (1024.0 * 1024.0));
if (budget_increased) {
LOG_INFO("%s streaming budget = %.2f MB",
get_desc().c_str(),
effective_budget / (1024.0 * 1024.0));
} else {
LOG_DEBUG("%s streaming budget = %.2f MB",
get_desc().c_str(),
effective_budget / (1024.0 * 1024.0));
}
}
return true;
}
@ -3053,6 +3070,7 @@ public:
ggml_backend_buffer_free(params_buffer);
params_buffer = nullptr;
}
observed_max_effective_budget_ = 0;
}
size_t get_params_buffer_size() {

View File

@ -699,9 +699,9 @@ namespace sd::ggml_graph_cut {
}
if (log_desc != nullptr) {
LOG_INFO("%s graph cut max_vram budget merge took %lld ms",
log_desc,
ggml_time_ms() - t_budget_begin);
LOG_DEBUG("%s graph cut max_vram budget merge took %lld ms",
log_desc,
ggml_time_ms() - t_budget_begin);
}
return merged_plan;