#include "model_manager.h" #include #include #include #include #include #include "core/ggml_extend_backend.h" #include "core/util.h" #include "model/adapter/lora.hpp" static size_t aligned_offset(const void* buffer, size_t offset, size_t alignment) { GGML_ASSERT(alignment != 0 && (alignment & (alignment - 1)) == 0); size_t align = (alignment - ((reinterpret_cast(buffer) + offset) % alignment)) % alignment; return offset + align; } static bool lora_specs_equal(const std::vector& lhs, const std::vector& rhs) { if (lhs.size() != rhs.size()) { return false; } for (size_t i = 0; i < lhs.size(); ++i) { if (lhs[i].path != rhs[i].path || lhs[i].multiplier != rhs[i].multiplier || lhs[i].is_high_noise != rhs[i].is_high_noise || lhs[i].tensor_name_prefix_filter != rhs[i].tensor_name_prefix_filter || lhs[i].required != rhs[i].required) { return false; } } return true; } static std::string lora_id(const ModelManager::LoraSpec& lora) { return lora.is_high_noise ? "|high_noise|" + lora.path : lora.path; } static bool backend_supports_host_buffer(ggml_backend_t backend) { if (backend == nullptr) { return false; } if (sd_backend_is_cpu(backend)) { return true; } ggml_backend_dev_t dev = ggml_backend_get_device(backend); if (dev == nullptr) { return false; } ggml_backend_dev_props props; ggml_backend_dev_get_props(dev, &props); return props.caps.buffer_from_host_ptr; } ModelManager::~ModelManager() { release_all(); } void ModelManager::set_common_ignore_tensors(std::set ignore_tensors) { common_ignore_tensors_ = std::move(ignore_tensors); } void ModelManager::set_loras(std::vector loras, SDVersion version) { if (loras.empty() && loras_.empty()) { lora_version_ = version; return; } if (lora_version_ == version && lora_specs_equal(loras_, loras)) { return; } loras_ = std::move(loras); lora_version_ = version; current_lora_epoch_++; reset_lora_applied_params(); } std::set ModelManager::tensor_names() const { std::set names; for (const auto& state : tensor_states_) { if (state != nullptr) { names.insert(state->name); } } return names; } size_t estimate_tensors_size(const std::map& tensors) { size_t size = 0; std::unordered_set seen; for (const auto& pair : tensors) { ggml_tensor* tensor = pair.second; if (tensor == nullptr || seen.find(tensor) != seen.end()) { continue; } seen.insert(tensor); size += ggml_nbytes(tensor); } return size; } bool ModelManager::register_param_tensors(const std::string& desc, std::map tensors, ResidencyMode residency_mode, ggml_backend_t compute_backend, ggml_backend_t params_backend, size_t* registered_tensor_size) { if (desc.empty()) { LOG_ERROR("model manager tensor desc is empty"); return false; } if (registered_tensor_size != nullptr) { *registered_tensor_size += estimate_tensors_size(tensors); } std::vector> new_states; new_states.reserve(tensors.size()); for (const auto& pair : tensors) { const std::string& name = pair.first; ggml_tensor* tensor = pair.second; if (tensor == nullptr) { continue; } if (tensor_states_by_name_.find(name) != tensor_states_by_name_.end()) { LOG_ERROR("model manager tensor name '%s' is already registered", name.c_str()); return false; } ggml_set_name(tensor, name.c_str()); auto state = std::make_unique(); state->name = name; state->tensor = tensor; state->desc = desc; state->residency_mode = residency_mode; state->compute_backend = compute_backend; state->params_backend = params_backend; new_states.push_back(std::move(state)); } for (auto& state : new_states) { TensorState* registered_state = state.get(); tensor_states_by_name_[registered_state->name] = registered_state; tensor_states_.push_back(std::move(state)); } return true; } bool ModelManager::validate_registered_tensors() { bool ok = true; for (const auto& state : tensor_states_) { if (state == nullptr) { ok = false; continue; } bool state_ok = validate_tensor(*state); if (state_ok) { state->metadata_validated = true; } ok = state_ok && ok; } return ok; } bool ModelManager::load_tensors_to_params_backend(const std::vector& states) { std::vector need_load; need_load.reserve(states.size()); for (TensorState* state : states) { if (state == nullptr || should_ignore(*state) || is_optional_missing_tensor(state->name)) { continue; } if (!state->metadata_validated) { if (!validate_tensor(*state)) { return false; } state->metadata_validated = true; } if (!state->loaded_to_params_backend) { need_load.push_back(state); } } if (need_load.empty()) { return true; } std::vector created_storage_blocks; if (!mmap_params(need_load, created_storage_blocks)) { for (ParamsStorageBlock* block : created_storage_blocks) { if (block != nullptr) { free_params_storage_block(*block); erase_params_storage_block(block); } } return false; } std::vector need_alloc; need_alloc.reserve(need_load.size()); for (TensorState* state : need_load) { if (state->tensor != nullptr && state->tensor->data == nullptr && state->tensor->view_src == nullptr) { need_alloc.push_back(state); } } if (!alloc_params_buffers(need_alloc, created_storage_blocks) || !load_tensors(need_load)) { for (ParamsStorageBlock* block : created_storage_blocks) { if (block != nullptr) { free_params_storage_block(*block); erase_params_storage_block(block); } } return false; } for (ParamsStorageBlock* block : created_storage_blocks) { if (block != nullptr && block->buffer != nullptr) { LOG_DEBUG("model manager prepared params backend buffer (%6.2f MB, %zu tensors, %s)", ggml_backend_buffer_get_size(block->buffer) / (1024.f * 1024.f), block->states.size(), ggml_backend_buffer_is_host(block->buffer) ? "RAM" : "VRAM"); } } return true; } bool ModelManager::stage_tensors_to_compute_backend(const std::vector& states) { std::map> states_by_compute_backend; for (TensorState* state : states) { if (state == nullptr || should_ignore(*state) || is_optional_missing_tensor(state->name)) { continue; } if (state->compute_backend == nullptr) { LOG_ERROR("model manager compute backend is null for tensor '%s'", state->name.c_str()); return false; } if (state->params_backend == nullptr) { LOG_ERROR("model manager params backend is null for tensor '%s'", state->name.c_str()); return false; } if (state->compute_backend == state->params_backend || state->staged_to_compute_backend) { continue; } if (!state->loaded_to_params_backend || state->tensor == nullptr || state->tensor->data == nullptr) { LOG_ERROR("model manager tensor '%s' is not loaded to params backend", state->name.c_str()); return false; } states_by_compute_backend[state->compute_backend].push_back(state); } for (const auto& pair : states_by_compute_backend) { ggml_backend_t compute_backend = pair.first; const std::vector& states = pair.second; if (states.empty()) { continue; } int64_t t0 = ggml_time_ms(); ggml_init_params init_params; init_params.mem_size = std::max(1, states.size()) * ggml_tensor_overhead(); init_params.mem_buffer = nullptr; init_params.no_alloc = true; ggml_context* staging_ctx = ggml_init(init_params); GGML_ASSERT(staging_ctx != nullptr); std::vector> staged_tensors; staged_tensors.reserve(states.size()); for (TensorState* state : states) { ggml_tensor* staging_tensor = ggml_dup_tensor(staging_ctx, state->tensor); ggml_set_name(staging_tensor, state->tensor->name); staged_tensors.push_back({state, staging_tensor}); } ggml_backend_buffer_t compute_buffer = ggml_backend_alloc_ctx_tensors(staging_ctx, compute_backend); if (compute_buffer == nullptr) { LOG_ERROR("model manager alloc compute params backend buffer failed, num_tensors = %zu", staged_tensors.size()); ggml_free(staging_ctx); return false; } ggml_backend_buffer_set_usage(compute_buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); for (auto& staged_tensor : staged_tensors) { TensorState* state = staged_tensor.first; ggml_tensor* managed_tensor = state->tensor; ggml_tensor* staging_tensor = staged_tensor.second; ggml_backend_tensor_copy(managed_tensor, staging_tensor); std::swap(managed_tensor->buffer, staging_tensor->buffer); std::swap(managed_tensor->data, staging_tensor->data); std::swap(managed_tensor->extra, staging_tensor->extra); } ggml_backend_synchronize(compute_backend); auto block = std::make_unique(); block->compute_backend = compute_backend; block->buffer = compute_buffer; block->staging_ctx = staging_ctx; block->staged_tensors = std::move(staged_tensors); for (auto& staged_tensor : block->staged_tensors) { TensorState* state = staged_tensor.first; state->staged_to_compute_backend = true; } compute_staging_blocks_.push_back(std::move(block)); int64_t t1 = ggml_time_ms(); LOG_DEBUG("model manager staged compute params (%6.2f MB, %zu tensors) to %s, taking %.2fs", ggml_backend_buffer_get_size(compute_buffer) / (1024.f * 1024.f), states.size(), ggml_backend_name(compute_backend), (t1 - t0) * 1.0f / 1000); } return true; } bool ModelManager::apply_loras_to_params(const std::vector& states) { if (loras_.empty()) { return true; } struct LoraApplyGroup { std::map model_tensors; std::vector states; }; std::map groups; for (TensorState* state : states) { if (state == nullptr || state->tensor == nullptr || should_ignore(*state) || is_optional_missing_tensor(state->name)) { continue; } if (state->applied_lora_epoch == current_lora_epoch_) { continue; } if (state->compute_backend == nullptr) { LOG_ERROR("model manager compute backend is null for lora target tensor '%s'", state->name.c_str()); return false; } if (state->tensor->data == nullptr) { LOG_ERROR("model manager lora target tensor '%s' is not prepared", state->name.c_str()); return false; } LoraApplyGroup& group = groups[state->compute_backend]; group.model_tensors[state->name] = state->tensor; group.states.push_back(state); } if (groups.empty()) { return true; } std::set all_tensor_names = tensor_names(); for (auto& group_pair : groups) { ggml_backend_t compute_backend = group_pair.first; LoraApplyGroup& group = group_pair.second; for (const LoraSpec& lora_spec : loras_) { if (group.model_tensors.empty()) { continue; } std::string id = lora_id(lora_spec); auto lora = std::make_shared(id, compute_backend, compute_backend, lora_spec.path, lora_spec.is_high_noise ? "model.high_noise_" : "", lora_version_); LoraModel::filter_t lora_tensor_filter = nullptr; if (!lora_spec.tensor_name_prefix_filter.empty()) { lora_tensor_filter = [&](const std::string& tensor_name) { return starts_with(tensor_name, lora_spec.tensor_name_prefix_filter); }; } if (!lora->load_from_file(n_threads_, lora_tensor_filter)) { LOG_WARN("load lora tensors from %s failed", lora_spec.path.c_str()); if (lora_spec.required) { return false; } continue; } if (lora->lora_tensors.empty()) { if (lora_spec.required) { LOG_ERROR("required lora has no tensors: %s", lora_spec.path.c_str()); return false; } continue; } lora->multiplier = lora_spec.multiplier; lora->apply(group.model_tensors, all_tensor_names, lora_version_, n_threads_, false); lora->release_loaded_tensors(); } for (TensorState* state : group.states) { if (state != nullptr) { state->applied_lora_epoch = current_lora_epoch_; } } } return true; } void ModelManager::reset_lora_applied_params() { release_compute_staging_blocks(true); release_params_storage_blocks(true); for (auto& state : tensor_states_) { state->applied_lora_epoch = UINT64_MAX; } } bool ModelManager::should_ignore(const TensorState& state) const { for (const auto& ignore_prefix : common_ignore_tensors_) { if (starts_with(state.name, ignore_prefix)) { return true; } } return false; } bool ModelManager::is_optional_missing_tensor(const std::string& name) const { return name.find("cond_stage_model.transformer.text_model.encoder.layers.23") != std::string::npos || name.find("alphas_cumprod") != std::string::npos; } bool ModelManager::validate_tensor(const TensorState& state) const { if (state.tensor == nullptr || should_ignore(state) || is_optional_missing_tensor(state.name)) { return true; } const auto& tensor_storage_map = model_loader_.get_tensor_storage_map(); auto ts_it = tensor_storage_map.find(state.name); if (ts_it == tensor_storage_map.end()) { LOG_ERROR("%s tensor '%s' not in model metadata", state.desc.c_str(), state.name.c_str()); return false; } const TensorStorage& tensor_storage = ts_it->second; if (state.tensor->ne[0] != tensor_storage.ne[0] || state.tensor->ne[1] != tensor_storage.ne[1] || state.tensor->ne[2] != tensor_storage.ne[2] || state.tensor->ne[3] != tensor_storage.ne[3]) { LOG_ERROR( "%s tensor '%s' has wrong shape in model metadata: got [%d, %d, %d, %d], expected [%d, %d, %d, %d]", state.desc.c_str(), state.name.c_str(), (int)tensor_storage.ne[0], (int)tensor_storage.ne[1], (int)tensor_storage.ne[2], (int)tensor_storage.ne[3], (int)state.tensor->ne[0], (int)state.tensor->ne[1], (int)state.tensor->ne[2], (int)state.tensor->ne[3]); return false; } return true; } bool ModelManager::mmap_params(const std::vector& states, std::vector& created_storage_blocks) { std::map mmap_candidates; std::map mmap_states; for (TensorState* state : states) { if (state == nullptr || !can_mmap_storage(*state) || state->tensor == nullptr || state->tensor->data != nullptr || state->tensor->view_src != nullptr) { continue; } mmap_candidates[state->name] = state->tensor; mmap_states[state->name] = state; } if (mmap_candidates.empty()) { return true; } auto mmap_store = model_loader_.mmap_tensors(mmap_candidates, {}, true); if (mmap_store.empty()) { return true; } auto block = std::make_unique(); block->mmap_tensor_stores = std::move(mmap_store); ParamsStorageBlock* raw = block.get(); for (const auto& pair : mmap_states) { TensorState* state = pair.second; if (state != nullptr && state->tensor != nullptr && state->tensor->data != nullptr) { block->states.push_back(state); } } if (!block->states.empty()) { params_storage_blocks_.push_back(std::move(block)); created_storage_blocks.push_back(raw); } return true; } bool ModelManager::can_mmap_storage(const TensorState& state) const { if (!enable_mmap_ || state.residency_mode != ResidencyMode::ParamBackend) { return false; } if (state.compute_backend == nullptr || state.params_backend == nullptr) { return false; } return sd_backend_is_cpu(state.compute_backend) || sd_backend_is_cpu(state.params_backend) || backend_supports_host_buffer(state.compute_backend); } bool ModelManager::alloc_params_buffers(const std::vector& states, std::vector& created_storage_blocks) { std::map, std::vector> states_by_buffer_type; for (TensorState* state : states) { if (state == nullptr || state->tensor == nullptr) { continue; } ggml_backend_buffer_type_t params_buft = params_buffer_type_for(*state); if (params_buft == nullptr) { return false; } states_by_buffer_type[{params_buft, static_cast(state->residency_mode)}].push_back(state); } for (const auto& pair : states_by_buffer_type) { ggml_backend_buffer_type_t params_buft = pair.first.first; const std::vector& states = pair.second; size_t alignment = ggml_backend_buft_get_alignment(params_buft); size_t max_size = ggml_backend_buft_get_max_size(params_buft); auto alloc_chunk = [&](const std::vector& chunk, size_t chunk_size) -> bool { if (chunk.empty() || chunk_size == 0) { return true; } ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(params_buft, chunk_size); if (buffer == nullptr) { LOG_ERROR("model manager alloc params backend buffer failed, size = %.2fMB", chunk_size / (1024.0 * 1024.0)); return false; } ggml_backend_buffer_set_usage(buffer, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); std::vector initialized_tensors; void* base = ggml_backend_buffer_get_base(buffer); size_t offset = aligned_offset(base, 0, ggml_backend_buffer_get_alignment(buffer)); for (TensorState* state : chunk) { ggml_tensor* tensor = state->tensor; size_t tensor_size = GGML_PAD(ggml_backend_buffer_get_alloc_size(buffer, tensor), ggml_backend_buffer_get_alignment(buffer)); enum ggml_status status = ggml_backend_tensor_alloc(buffer, tensor, static_cast(base) + offset); if (status != GGML_STATUS_SUCCESS) { LOG_ERROR("model manager failed to initialize params tensor '%s'", ggml_get_name(tensor)); for (ggml_tensor* initialized : initialized_tensors) { initialized->buffer = nullptr; initialized->data = nullptr; initialized->extra = nullptr; } LOG_DEBUG("model manager releasing params backend buffer (%6.2f MB, %zu tensors, %s)", ggml_backend_buffer_get_size(buffer) / (1024.f * 1024.f), initialized_tensors.size(), ggml_backend_buffer_is_host(buffer) ? "RAM" : "VRAM"); ggml_backend_buffer_free(buffer); return false; } initialized_tensors.push_back(tensor); offset += tensor_size; } auto block = std::make_unique(); block->buffer = buffer; block->states = chunk; ParamsStorageBlock* raw = block.get(); params_storage_blocks_.push_back(std::move(block)); created_storage_blocks.push_back(raw); return true; }; std::vector chunk; size_t chunk_size = 0; for (TensorState* state : states) { ggml_tensor* tensor = state->tensor; size_t tensor_size = GGML_PAD(ggml_backend_buft_get_alloc_size(params_buft, tensor), alignment); // Some backends, e.g. Vulkan, report a preferred chunk size here rather than a // hard per-tensor allocation limit. Oversized tensors are allocated alone. if (!chunk.empty() && max_size > 0 && chunk_size + tensor_size > max_size) { if (!alloc_chunk(chunk, chunk_size)) { return false; } chunk.clear(); chunk_size = 0; } chunk.push_back(state); chunk_size += tensor_size; } if (!alloc_chunk(chunk, chunk_size)) { return false; } } return true; } bool ModelManager::load_tensors(const std::vector& states) { std::map states_by_name; std::set target_tensor_names; for (TensorState* state : states) { if (state == nullptr) { continue; } states_by_name[state->name] = state; target_tensor_names.insert(state->name); } if (states_by_name.empty()) { return true; } std::set loaded_names; std::mutex loaded_names_mutex; auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; *dst_tensor = nullptr; auto state_it = states_by_name.find(name); if (state_it == states_by_name.end()) { return true; } TensorState* state = state_it->second; if (state == nullptr || state->tensor == nullptr) { LOG_ERROR("model manager tensor '%s' is null", name.c_str()); return false; } if (state->tensor->ne[0] != tensor_storage.ne[0] || state->tensor->ne[1] != tensor_storage.ne[1] || state->tensor->ne[2] != tensor_storage.ne[2] || state->tensor->ne[3] != tensor_storage.ne[3]) { LOG_ERROR( "model manager tensor '%s' has wrong shape in model file: got [%d, %d, %d, %d], expected [%d, %d, %d, %d]", name.c_str(), (int)tensor_storage.ne[0], (int)tensor_storage.ne[1], (int)tensor_storage.ne[2], (int)tensor_storage.ne[3], (int)state->tensor->ne[0], (int)state->tensor->ne[1], (int)state->tensor->ne[2], (int)state->tensor->ne[3]); return false; } { std::lock_guard lock(loaded_names_mutex); loaded_names.insert(name); } *dst_tensor = state->tensor; return true; }; if (!model_loader_.load_tensors(on_new_tensor_cb, enable_mmap_, &target_tensor_names)) { LOG_ERROR("model manager load tensors failed"); return false; } bool missing = false; for (const auto& pair : states_by_name) { const std::string& name = pair.first; if (loaded_names.find(name) == loaded_names.end()) { LOG_ERROR("model manager tensor '%s' was not loaded", name.c_str()); missing = true; } } if (missing) { return false; } for (const auto& pair : states_by_name) { pair.second->loaded_to_params_backend = true; } return true; } ggml_backend_buffer_type_t ModelManager::params_buffer_type_for(const TensorState& state) const { if (state.params_backend == nullptr) { LOG_ERROR("model manager params backend is null for tensor '%s'", state.name.c_str()); return nullptr; } ggml_backend_buffer_type_t params_buft = nullptr; if (state.compute_backend != nullptr && state.params_backend != state.compute_backend) { ggml_backend_dev_t compute_dev = ggml_backend_get_device(state.compute_backend); if (compute_dev != nullptr) { params_buft = ggml_backend_dev_host_buffer_type(compute_dev); } } if (params_buft == nullptr) { params_buft = ggml_backend_get_default_buffer_type(state.params_backend); } return params_buft; } void ModelManager::free_compute_staging_block(ComputeStagingBlock& block) { for (auto& staged_tensor : block.staged_tensors) { TensorState* state = staged_tensor.first; ggml_tensor* staging_tensor = staged_tensor.second; if (state == nullptr || state->tensor == nullptr || staging_tensor == nullptr) { continue; } ggml_tensor* managed_tensor = state->tensor; managed_tensor->buffer = staging_tensor->buffer; managed_tensor->data = staging_tensor->data; managed_tensor->extra = staging_tensor->extra; staging_tensor->buffer = nullptr; staging_tensor->data = nullptr; staging_tensor->extra = nullptr; state->staged_to_compute_backend = false; state->applied_lora_epoch = UINT64_MAX; } if (block.buffer != nullptr) { LOG_DEBUG("model manager releasing compute params (%6.2f MB, %zu tensors) from %s", ggml_backend_buffer_get_size(block.buffer) / (1024.f * 1024.f), block.staged_tensors.size(), block.compute_backend != nullptr ? ggml_backend_name(block.compute_backend) : "unknown"); ggml_backend_buffer_free(block.buffer); block.buffer = nullptr; } if (block.staging_ctx != nullptr) { ggml_free(block.staging_ctx); block.staging_ctx = nullptr; } block.staged_tensors.clear(); } void ModelManager::release_compute_staging_blocks(bool force, const std::unordered_set* target_states) { for (auto it = compute_staging_blocks_.begin(); it != compute_staging_blocks_.end();) { ComputeStagingBlock* block = it->get(); bool can_release = force; if (!can_release) { can_release = std::all_of(block->staged_tensors.begin(), block->staged_tensors.end(), [target_states](const std::pair& pair) { TensorState* state = pair.first; if (state == nullptr) { return true; } if (target_states != nullptr && target_states->find(state) == target_states->end()) { return false; } return state->active_prepare_count == 0; }); } if (can_release) { free_compute_staging_block(*block); it = compute_staging_blocks_.erase(it); } else { ++it; } } } void ModelManager::free_params_storage_block(ParamsStorageBlock& block) { if (block.buffer != nullptr) { LOG_DEBUG("model manager releasing params backend buffer (%6.2f MB, %zu tensors, %s)", ggml_backend_buffer_get_size(block.buffer) / (1024.f * 1024.f), block.states.size(), ggml_backend_buffer_is_host(block.buffer) ? "RAM" : "VRAM"); ggml_backend_buffer_free(block.buffer); block.buffer = nullptr; } block.mmap_tensor_stores.clear(); for (TensorState* state : block.states) { if (state == nullptr || state->tensor == nullptr) { continue; } state->tensor->buffer = nullptr; state->tensor->data = nullptr; state->tensor->extra = nullptr; state->loaded_to_params_backend = false; state->applied_lora_epoch = UINT64_MAX; } block.states.clear(); } void ModelManager::release_params_storage_blocks(bool force, const std::unordered_set* target_states) { for (auto it = params_storage_blocks_.begin(); it != params_storage_blocks_.end();) { ParamsStorageBlock* block = it->get(); bool can_release = force; if (!can_release) { can_release = std::all_of(block->states.begin(), block->states.end(), [target_states](TensorState* state) { if (state == nullptr) { return true; } if (target_states != nullptr && target_states->find(state) == target_states->end()) { return false; } return state->active_prepare_count == 0 && !state->staged_to_compute_backend && state->residency_mode == ResidencyMode::Disk; }); } if (can_release) { free_params_storage_block(*block); it = params_storage_blocks_.erase(it); } else { ++it; } } } void ModelManager::erase_params_storage_block(ParamsStorageBlock* block) { auto it = std::find_if(params_storage_blocks_.begin(), params_storage_blocks_.end(), [block](const std::unique_ptr& item) { return item.get() == block; }); if (it != params_storage_blocks_.end()) { params_storage_blocks_.erase(it); } } void ModelManager::release_all() { for (auto& state : tensor_states_) { state->active_prepare_count = 0; state->applied_lora_epoch = UINT64_MAX; } release_compute_staging_blocks(true); release_params_storage_blocks(true); } bool ModelManager::resolve_required_tensor_states(const std::vector& tensors, std::vector& required_states) const { required_states.clear(); std::unordered_set seen; for (ggml_tensor* tensor : tensors) { if (tensor == nullptr) { continue; } const char* raw_name = ggml_get_name(tensor); if (raw_name == nullptr || raw_name[0] == '\0') { LOG_ERROR("model manager unnamed tensor is not registered"); return false; } auto state_it = tensor_states_by_name_.find(raw_name); if (state_it == tensor_states_by_name_.end()) { LOG_ERROR("model manager tensor '%s' is not registered", raw_name); return false; } TensorState* state = state_it->second; if (state == nullptr) { LOG_ERROR("model manager tensor '%s' has no tensor state", raw_name); return false; } if (seen.insert(state).second) { required_states.push_back(state); } } return true; } bool ModelManager::prepare_params(const std::vector& tensors) { if (tensors.empty()) { return true; } std::vector required_states; if (!resolve_required_tensor_states(tensors, required_states)) { return false; } if (!load_tensors_to_params_backend(required_states)) { return false; } if (!stage_tensors_to_compute_backend(required_states)) { release_compute_staging_blocks(false); release_params_storage_blocks(false); return false; } if (!apply_loras_to_params(required_states)) { release_compute_staging_blocks(false); release_params_storage_blocks(false); return false; } for (TensorState* state : required_states) { if (state == nullptr) { continue; } state->active_prepare_count++; } return true; } void ModelManager::finish_compute_backend_usage(const std::vector& states) { if (states.empty()) { return; } std::unordered_set target_states; for (TensorState* state : states) { if (state == nullptr || !target_states.insert(state).second) { continue; } if (state->active_prepare_count > 0) { state->active_prepare_count--; } } release_compute_staging_blocks(false, &target_states); } void ModelManager::release_compute_backend_params(const std::vector& tensors) { if (tensors.empty()) { return; } std::vector required_states; if (!resolve_required_tensor_states(tensors, required_states)) { return; } finish_compute_backend_usage(required_states); } void ModelManager::release_params_backend_params(const std::vector& tensors) { if (tensors.empty()) { return; } std::vector required_states; if (!resolve_required_tensor_states(tensors, required_states)) { return; } if (required_states.empty()) { return; } std::unordered_set target_states(required_states.begin(), required_states.end()); release_params_storage_blocks(false, &target_states); }