mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
perf: ratchet streaming budget so plan stops re-merging every step (#1611)
This commit is contained in:
parent
74f513d512
commit
0648f4426b
@ -1706,8 +1706,9 @@ protected:
|
|||||||
std::unordered_set<ggml_tensor*> resident_param_set;
|
std::unordered_set<ggml_tensor*> resident_param_set;
|
||||||
uint64_t resident_state_token = 0;
|
uint64_t resident_state_token = 0;
|
||||||
|
|
||||||
size_t max_graph_vram_bytes = 0;
|
size_t max_graph_vram_bytes = 0;
|
||||||
bool stream_layers_enabled = false;
|
bool stream_layers_enabled = false;
|
||||||
|
size_t observed_max_effective_budget_ = 0;
|
||||||
|
|
||||||
sd::layer_registry::LayerRegistry layer_registry_;
|
sd::layer_registry::LayerRegistry layer_registry_;
|
||||||
|
|
||||||
@ -2446,15 +2447,25 @@ protected:
|
|||||||
constexpr size_t safety_margin = 512ull * 1024 * 1024;
|
constexpr size_t safety_margin = 512ull * 1024 * 1024;
|
||||||
size_t free_clamp = (free_vram > safety_margin) ? (free_vram - safety_margin) : 0;
|
size_t free_clamp = (free_vram > safety_margin) ? (free_vram - safety_margin) : 0;
|
||||||
if (free_clamp < effective_budget) {
|
if (free_clamp < effective_budget) {
|
||||||
LOG_INFO("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
|
LOG_DEBUG("%s clamping streaming budget: actual free VRAM %.2f MB < user cap %.2f MB",
|
||||||
get_desc().c_str(),
|
get_desc().c_str(),
|
||||||
free_clamp / (1024.0 * 1024.0),
|
free_clamp / (1024.0 * 1024.0),
|
||||||
effective_budget / (1024.0 * 1024.0));
|
effective_budget / (1024.0 * 1024.0));
|
||||||
effective_budget = free_clamp;
|
effective_budget = free_clamp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool budget_increased = false;
|
||||||
|
if (stream_layers_enabled) {
|
||||||
|
if (effective_budget > observed_max_effective_budget_) {
|
||||||
|
observed_max_effective_budget_ = effective_budget;
|
||||||
|
budget_increased = true;
|
||||||
|
} else {
|
||||||
|
effective_budget = observed_max_effective_budget_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (effective_budget_out != nullptr) {
|
if (effective_budget_out != nullptr) {
|
||||||
*effective_budget_out = effective_budget;
|
*effective_budget_out = effective_budget;
|
||||||
}
|
}
|
||||||
@ -2466,9 +2477,15 @@ protected:
|
|||||||
params_tensor_set_,
|
params_tensor_set_,
|
||||||
get_desc().c_str());
|
get_desc().c_str());
|
||||||
if (stream_layers_enabled) {
|
if (stream_layers_enabled) {
|
||||||
LOG_INFO("%s streaming budget = %.2f MB",
|
if (budget_increased) {
|
||||||
get_desc().c_str(),
|
LOG_INFO("%s streaming budget = %.2f MB",
|
||||||
effective_budget / (1024.0 * 1024.0));
|
get_desc().c_str(),
|
||||||
|
effective_budget / (1024.0 * 1024.0));
|
||||||
|
} else {
|
||||||
|
LOG_DEBUG("%s streaming budget = %.2f MB",
|
||||||
|
get_desc().c_str(),
|
||||||
|
effective_budget / (1024.0 * 1024.0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -3053,6 +3070,7 @@ public:
|
|||||||
ggml_backend_buffer_free(params_buffer);
|
ggml_backend_buffer_free(params_buffer);
|
||||||
params_buffer = nullptr;
|
params_buffer = nullptr;
|
||||||
}
|
}
|
||||||
|
observed_max_effective_budget_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_params_buffer_size() {
|
size_t get_params_buffer_size() {
|
||||||
|
|||||||
@ -699,9 +699,9 @@ namespace sd::ggml_graph_cut {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (log_desc != nullptr) {
|
if (log_desc != nullptr) {
|
||||||
LOG_INFO("%s graph cut max_vram budget merge took %lld ms",
|
LOG_DEBUG("%s graph cut max_vram budget merge took %lld ms",
|
||||||
log_desc,
|
log_desc,
|
||||||
ggml_time_ms() - t_budget_begin);
|
ggml_time_ms() - t_budget_begin);
|
||||||
}
|
}
|
||||||
|
|
||||||
return merged_plan;
|
return merged_plan;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user