feat: throttle model loading progress updates (#782)

Some terminals have slow display latency, so frequent output during model loading can actually slow down the process. Also, since tensor loading times can vary a lot, the progress display now shows the average across past iterations instead of just the last one.
2026-02-04 19:03:35 +00:00 · 2025-09-01 10:32:01 -03:00 · 2025-09-01 10:32:01 -03:00 · eea77cbad9
commit eea77cbad9
parent 0e86d90ee4
1 changed files with 22 additions and 11 deletions
--- a/model.cpp
+++ b/model.cpp
@ -1942,8 +1942,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
            return true;
        };
        int tensor_count = 0;
-        int64_t t1       = ggml_time_ms();
+        int64_t t0       = ggml_time_ms();
-        bool partial     = false;
+        int64_t t1       = t0;
        bool partial     = true;
        int tensor_max   = (int)processed_tensor_storages.size();
        pretty_progress(0, tensor_max, 0.0f);
        for (auto& tensor_storage : processed_tensor_storages) {
            if (tensor_storage.file_index != file_index) {
                ++tensor_count;
@ -2046,21 +2049,29 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
                    ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
                }
            }
-            size_t tensor_max = processed_tensor_storages.size();
+            ++tensor_count;
-            int64_t t2        = ggml_time_ms();
+            int64_t t2 = ggml_time_ms();
-            pretty_progress(++tensor_count, tensor_max, (t2 - t1) / 1000.0f);
+            if ((t2 - t1) >= 200) {
-            t1      = t2;
+                t1 = t2;
-            partial = tensor_count != tensor_max;
+                pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
                partial = tensor_count != tensor_max;
            }
        }
        if (partial) {
            if (tensor_count >= 1) {
                t1 = ggml_time_ms();
                pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
            }
            if (tensor_count < tensor_max) {
                printf("\n");
            }
        }
        if (zip != NULL) {
            zip_close(zip);
        }
        if (partial) {
            printf("\n");
        }
        if (!success) {
            break;
        }