mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
perf: cap planner budget when model dwarfs the streaming budget (#1612)
This commit is contained in:
parent
b3d56d0ba1
commit
17a2b4a315
@ -2470,10 +2470,26 @@ protected:
|
||||
*effective_budget_out = effective_budget;
|
||||
}
|
||||
|
||||
// When streaming and the model dwarfs the budget, cap the planner at
|
||||
// a quarter so it builds smaller merged segments and chunk-K can fit
|
||||
// alongside. Without streaming the cap only adds dispatch overhead.
|
||||
size_t planner_budget = effective_budget;
|
||||
if (stream_layers_enabled) {
|
||||
size_t total_params_bytes = 0;
|
||||
for (const ggml_tensor* t : params_tensor_set_) {
|
||||
if (t != nullptr) {
|
||||
total_params_bytes += ggml_nbytes(t);
|
||||
}
|
||||
}
|
||||
if (total_params_bytes * 4 > effective_budget * 3) {
|
||||
planner_budget = effective_budget / 4;
|
||||
}
|
||||
}
|
||||
|
||||
*plan_out = sd::ggml_graph_cut::resolve_plan(runtime_backend,
|
||||
gf,
|
||||
&graph_cut_plan_cache_,
|
||||
effective_budget,
|
||||
planner_budget,
|
||||
params_tensor_set_,
|
||||
get_desc().c_str());
|
||||
if (stream_layers_enabled) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user