From bf0216765ad49b17ade1526a4b704397b5ec8470 Mon Sep 17 00:00:00 2001 From: leejet Date: Tue, 31 Mar 2026 23:06:44 +0800 Subject: [PATCH] feat: show tensor loading progress in MB/s or GB/s (#1380) --- src/model.cpp | 16 +++++++++++--- src/util.cpp | 61 +++++++++++++++++++++++++++++++++++++++++---------- src/util.h | 1 + 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/model.cpp b/src/model.cpp index 2c708ed..1ccb03c 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -1311,6 +1311,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread std::atomic memcpy_time_ms(0); std::atomic copy_to_backend_time_ms(0); std::atomic convert_time_ms(0); + std::atomic bytes_processed(0); int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores(); LOG_DEBUG("using %d threads for model loading", num_threads_to_use); @@ -1522,6 +1523,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread t1 = ggml_time_ms(); copy_to_backend_time_ms.fetch_add(t1 - t0); } + + bytes_processed.fetch_add((uint64_t)nbytes_to_read); } if (zip != nullptr) { zip_close(zip); @@ -1534,8 +1537,12 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread if (current_idx >= file_tensors.size() || failed) { break; } - size_t curr_num = total_tensors_processed + current_idx; - pretty_progress(static_cast(curr_num), static_cast(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (curr_num + 1e-6f)); + size_t curr_num = total_tensors_processed + current_idx; + float elapsed_seconds = (ggml_time_ms() - t_start) / 1000.0f; + pretty_bytes_progress(static_cast(curr_num), + static_cast(total_tensors_to_process), + bytes_processed.load(), + elapsed_seconds); std::this_thread::sleep_for(std::chrono::milliseconds(200)); } @@ -1548,7 +1555,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread break; } total_tensors_processed += file_tensors.size(); - pretty_progress(static_cast(total_tensors_processed), static_cast(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (total_tensors_processed + 1e-6f)); + pretty_bytes_progress(static_cast(total_tensors_processed), + static_cast(total_tensors_to_process), + bytes_processed.load(), + (ggml_time_ms() - t_start) / 1000.0f); if (total_tensors_processed < total_tensors_to_process) { printf("\n"); } diff --git a/src/util.cpp b/src/util.cpp index 2d330a2..e018762 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -337,17 +337,13 @@ std::vector split_string(const std::string& str, char delimiter) { return result; } -void pretty_progress(int step, int steps, float time) { - if (sd_progress_cb) { - sd_progress_cb(step, steps, time, sd_progress_cb_data); - return; - } - if (step == 0) { - return; - } +static std::string build_progress_bar(int step, int steps) { std::string progress = " |"; int max_progress = 50; - int32_t current = (int32_t)(step * 1.f * max_progress / steps); + int32_t current = 0; + if (steps > 0) { + current = (int32_t)(step * 1.f * max_progress / steps); + } for (int i = 0; i < 50; i++) { if (i > current) { progress += " "; @@ -358,16 +354,57 @@ void pretty_progress(int step, int steps, float time) { } } progress += "|"; + return progress; +} - const char* lf = (step == steps ? "\n" : ""); +static void print_progress_line(int step, int steps, const std::string& speed_text) { + if (step == 0) { + return; + } + std::string progress = build_progress_bar(step, steps); + const char* lf = (step == steps ? "\n" : ""); + printf("\r%s %i/%i - %s\033[K%s", progress.c_str(), step, steps, speed_text.c_str(), lf); + fflush(stdout); // for linux +} + +void pretty_progress(int step, int steps, float time) { + if (sd_progress_cb) { + sd_progress_cb(step, steps, time, sd_progress_cb_data); + return; + } + if (step == 0) { + return; + } const char* unit = "s/it"; float speed = time; if (speed < 1.0f && speed > 0.f) { speed = 1.0f / speed; unit = "it/s"; } - printf("\r%s %i/%i - %.2f%s\033[K%s", progress.c_str(), step, steps, speed, unit, lf); - fflush(stdout); // for linux + print_progress_line(step, steps, sd_format("%.2f%s", speed, unit)); +} + +void pretty_bytes_progress(int step, int steps, uint64_t bytes_processed, float elapsed_seconds) { + if (sd_progress_cb) { + float time = elapsed_seconds / (step + 1e-6f); + sd_progress_cb(step, steps, time, sd_progress_cb_data); + return; + } + if (step == 0) { + return; + } + + double bytes_per_second = 0.0; + if (elapsed_seconds > 0.0f) { + bytes_per_second = bytes_processed / (double)elapsed_seconds; + } + + double speed_mb = bytes_per_second / (1024.0 * 1024.0); + if (speed_mb >= 1024.0) { + print_progress_line(step, steps, sd_format("%.2fGB/s", speed_mb / 1024.0)); + } else { + print_progress_line(step, steps, sd_format("%.2fMB/s", speed_mb)); + } } std::string ltrim(const std::string& s) { diff --git a/src/util.h b/src/util.h index 24ce4cf..2468cb9 100644 --- a/src/util.h +++ b/src/util.h @@ -64,6 +64,7 @@ protected: std::string path_join(const std::string& p1, const std::string& p2); std::vector split_string(const std::string& str, char delimiter); void pretty_progress(int step, int steps, float time); +void pretty_bytes_progress(int step, int steps, uint64_t bytes_processed, float elapsed_seconds); void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);