From 0648f4426b4fcf11f326cc87030bd4727f44666e Mon Sep 17 00:00:00 2001 From: fszontagh <51741446+fszontagh@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:32:03 +0200 Subject: [PATCH] perf: ratchet streaming budget so plan stops re-merging every step (#1611) --- src/ggml_extend.hpp | 36 +++++++++++++++++++++++++++--------- src/ggml_graph_cut.cpp | 6 +++--- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/ggml_extend.hpp b/src/ggml_extend.hpp index 9dada344..f6ac1d65 100644 --- a/src/ggml_extend.hpp +++ b/src/ggml_extend.hpp @@ -1706,8 +1706,9 @@ protected: std::unordered_set resident_param_set; uint64_t resident_state_token = 0; - size_t max_graph_vram_bytes = 0; - bool stream_layers_enabled = false; + size_t max_graph_vram_bytes = 0; + bool stream_layers_enabled = false; + size_t observed_max_effective_budget_ = 0; sd::layer_registry::LayerRegistry layer_registry_; @@ -2446,15 +2447,25 @@ protected: constexpr size_t safety_margin = 512ull * 1024 * 1024; size_t free_clamp = (free_vram > safety_margin) ? (free_vram - safety_margin) : 0; if (free_clamp < effective_budget) { - LOG_INFO("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB", - get_desc().c_str(), - free_clamp / (1024.0 * 1024.0), - effective_budget / (1024.0 * 1024.0)); + LOG_DEBUG("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB", + get_desc().c_str(), + free_clamp / (1024.0 * 1024.0), + effective_budget / (1024.0 * 1024.0)); effective_budget = free_clamp; } } } + bool budget_increased = false; + if (stream_layers_enabled) { + if (effective_budget > observed_max_effective_budget_) { + observed_max_effective_budget_ = effective_budget; + budget_increased = true; + } else { + effective_budget = observed_max_effective_budget_; + } + } + if (effective_budget_out != nullptr) { *effective_budget_out = effective_budget; } @@ -2466,9 +2477,15 @@ protected: params_tensor_set_, get_desc().c_str()); if (stream_layers_enabled) { - LOG_INFO("%s streaming budget = %.2f MB", - get_desc().c_str(), - effective_budget / (1024.0 * 1024.0)); + if (budget_increased) { + LOG_INFO("%s streaming budget = %.2f MB", + get_desc().c_str(), + effective_budget / (1024.0 * 1024.0)); + } else { + LOG_DEBUG("%s streaming budget = %.2f MB", + get_desc().c_str(), + effective_budget / (1024.0 * 1024.0)); + } } return true; } @@ -3053,6 +3070,7 @@ public: ggml_backend_buffer_free(params_buffer); params_buffer = nullptr; } + observed_max_effective_budget_ = 0; } size_t get_params_buffer_size() { diff --git a/src/ggml_graph_cut.cpp b/src/ggml_graph_cut.cpp index 61234eaf..2009eaf0 100644 --- a/src/ggml_graph_cut.cpp +++ b/src/ggml_graph_cut.cpp @@ -699,9 +699,9 @@ namespace sd::ggml_graph_cut { } if (log_desc != nullptr) { - LOG_INFO("%s graph cut max_vram budget merge took %lld ms", - log_desc, - ggml_time_ms() - t_budget_begin); + LOG_DEBUG("%s graph cut max_vram budget merge took %lld ms", + log_desc, + ggml_time_ms() - t_budget_begin); } return merged_plan;