mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
feat: add detailed tensor loading time stat (#793)
This commit is contained in:
parent
c587a43c99
commit
c648001030
50
model.cpp
50
model.cpp
@ -1966,6 +1966,16 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
||||||
|
int64_t process_time_ms = 0;
|
||||||
|
int64_t read_time_ms = 0;
|
||||||
|
int64_t memcpy_time_ms = 0;
|
||||||
|
int64_t copy_to_backend_time_ms = 0;
|
||||||
|
int64_t convert_time_ms = 0;
|
||||||
|
|
||||||
|
int64_t prev_time_ms = 0;
|
||||||
|
int64_t curr_time_ms = 0;
|
||||||
|
int64_t start_time = ggml_time_ms();
|
||||||
|
prev_time_ms = start_time;
|
||||||
std::vector<TensorStorage> processed_tensor_storages;
|
std::vector<TensorStorage> processed_tensor_storages;
|
||||||
for (auto& tensor_storage : tensor_storages) {
|
for (auto& tensor_storage : tensor_storages) {
|
||||||
// LOG_DEBUG("%s", name.c_str());
|
// LOG_DEBUG("%s", name.c_str());
|
||||||
@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
}
|
}
|
||||||
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
|
std::vector<TensorStorage> dedup = remove_duplicates(processed_tensor_storages);
|
||||||
processed_tensor_storages = dedup;
|
processed_tensor_storages = dedup;
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
process_time_ms = curr_time_ms - prev_time_ms;
|
||||||
|
prev_time_ms = curr_time_ms;
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
|
||||||
@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
size_t entry_size = zip_entry_size(zip);
|
size_t entry_size = zip_entry_size(zip);
|
||||||
if (entry_size != n) {
|
if (entry_size != n) {
|
||||||
read_buffer.resize(entry_size);
|
read_buffer.resize(entry_size);
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
|
zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
read_time_ms += curr_time_ms - prev_time_ms;
|
||||||
|
prev_time_ms = curr_time_ms;
|
||||||
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
|
memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
memcpy_time_ms += curr_time_ms - prev_time_ms;
|
||||||
} else {
|
} else {
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
zip_entry_noallocread(zip, (void*)buf, n);
|
zip_entry_noallocread(zip, (void*)buf, n);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
read_time_ms += curr_time_ms - prev_time_ms;
|
||||||
}
|
}
|
||||||
zip_entry_close(zip);
|
zip_entry_close(zip);
|
||||||
} else {
|
} else {
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
file.seekg(tensor_storage.offset);
|
file.seekg(tensor_storage.offset);
|
||||||
file.read(buf, n);
|
file.read(buf, n);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
read_time_ms += curr_time_ms - prev_time_ms;
|
||||||
if (!file) {
|
if (!file) {
|
||||||
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
|
read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
if (tensor_storage.is_bf16) {
|
if (tensor_storage.is_bf16) {
|
||||||
// inplace op
|
// inplace op
|
||||||
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
|
bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements());
|
||||||
@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
} else if (tensor_storage.is_i64) {
|
} else if (tensor_storage.is_i64) {
|
||||||
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
|
i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements());
|
||||||
}
|
}
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
convert_time_ms += curr_time_ms - prev_time_ms;
|
||||||
} else {
|
} else {
|
||||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||||
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
||||||
|
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
if (tensor_storage.is_bf16) {
|
if (tensor_storage.is_bf16) {
|
||||||
// inplace op
|
// inplace op
|
||||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||||
@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
|
|
||||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
|
convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data,
|
||||||
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
convert_time_ms += curr_time_ms - prev_time_ms;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read()));
|
||||||
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read);
|
||||||
|
|
||||||
|
prev_time_ms = ggml_time_ms();
|
||||||
if (tensor_storage.is_bf16) {
|
if (tensor_storage.is_bf16) {
|
||||||
// inplace op
|
// inplace op
|
||||||
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements());
|
||||||
@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
|
|
||||||
if (tensor_storage.type == dst_tensor->type) {
|
if (tensor_storage.type == dst_tensor->type) {
|
||||||
// copy to device memory
|
// copy to device memory
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
convert_time_ms += curr_time_ms - prev_time_ms;
|
||||||
|
prev_time_ms = curr_time_ms;
|
||||||
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
|
||||||
} else {
|
} else {
|
||||||
// convert first, then copy to device memory
|
// convert first, then copy to device memory
|
||||||
convert_buffer.resize(ggml_nbytes(dst_tensor));
|
convert_buffer.resize(ggml_nbytes(dst_tensor));
|
||||||
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
|
convert_tensor((void*)read_buffer.data(), tensor_storage.type,
|
||||||
(void*)convert_buffer.data(), dst_tensor->type,
|
(void*)convert_buffer.data(), dst_tensor->type,
|
||||||
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
(int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]);
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
convert_time_ms += curr_time_ms - prev_time_ms;
|
||||||
|
prev_time_ms = curr_time_ms;
|
||||||
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
|
||||||
|
curr_time_ms = ggml_time_ms();
|
||||||
|
copy_to_backend_time_ms += curr_time_ms - prev_time_ms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
++tensor_count;
|
++tensor_count;
|
||||||
@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
int64_t end_time = ggml_time_ms();
|
||||||
|
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
|
||||||
|
(end_time - start_time) / 1000.f,
|
||||||
|
process_time_ms / 1000.f,
|
||||||
|
read_time_ms / 1000.f,
|
||||||
|
memcpy_time_ms / 1000.f,
|
||||||
|
convert_time_ms / 1000.f,
|
||||||
|
copy_to_backend_time_ms / 1000.f);
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -557,8 +557,6 @@ public:
|
|||||||
// load weights
|
// load weights
|
||||||
LOG_DEBUG("loading weights");
|
LOG_DEBUG("loading weights");
|
||||||
|
|
||||||
int64_t t0 = ggml_time_ms();
|
|
||||||
|
|
||||||
std::set<std::string> ignore_tensors;
|
std::set<std::string> ignore_tensors;
|
||||||
tensors["alphas_cumprod"] = alphas_cumprod_tensor;
|
tensors["alphas_cumprod"] = alphas_cumprod_tensor;
|
||||||
if (use_tiny_autoencoder) {
|
if (use_tiny_autoencoder) {
|
||||||
@ -656,11 +654,7 @@ public:
|
|||||||
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
|
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t t1 = ggml_time_ms();
|
|
||||||
LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000);
|
|
||||||
|
|
||||||
// check is_using_v_parameterization_for_sd2
|
// check is_using_v_parameterization_for_sd2
|
||||||
|
|
||||||
if (sd_version_is_sd2(version)) {
|
if (sd_version_is_sd2(version)) {
|
||||||
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
||||||
is_using_v_parameterization = true;
|
is_using_v_parameterization = true;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user