#ifndef __MODEL_MANAGER_H__ #define __MODEL_MANAGER_H__ #include #include #include #include #include #include #include #include "model_loader.h" #include "weight_manager.h" class ModelManager : public RunnerWeightManager { public: enum class ResidencyMode { Disk, ParamBackend, }; struct LoraSpec { std::string path; float multiplier = 1.0f; bool is_high_noise = false; std::string tensor_name_prefix_filter; bool required = false; }; private: struct TensorState { std::string name; ggml_tensor* tensor = nullptr; std::string desc; ResidencyMode residency_mode = ResidencyMode::ParamBackend; ggml_backend_t compute_backend = nullptr; ggml_backend_t params_backend = nullptr; bool metadata_validated = false; int active_prepare_count = 0; bool loaded_to_params_backend = false; bool staged_to_compute_backend = false; uint64_t applied_lora_epoch = UINT64_MAX; }; struct ParamsStorageBlock { ggml_backend_buffer_t buffer = nullptr; std::vector mmap_tensor_stores; std::vector states; }; struct ComputeStagingBlock { ggml_backend_t compute_backend = nullptr; ggml_backend_buffer_t buffer = nullptr; ggml_context* staging_ctx = nullptr; std::vector> staged_tensors; }; ModelLoader model_loader_; std::vector> tensor_states_; std::map tensor_states_by_name_; std::vector> params_storage_blocks_; std::vector> compute_staging_blocks_; std::set common_ignore_tensors_; std::vector loras_; SDVersion lora_version_ = VERSION_COUNT; uint64_t current_lora_epoch_ = 0; int n_threads_ = 0; bool enable_mmap_ = false; void finish_compute_backend_usage(const std::vector& states); void release_all(); bool resolve_required_tensor_states(const std::vector& tensors, std::vector& required_states) const; bool should_ignore(const TensorState& state) const; bool is_optional_missing_tensor(const std::string& name) const; bool validate_tensor(const TensorState& state) const; bool load_tensors_to_params_backend(const std::vector& states); bool apply_loras_to_params(const std::vector& states); bool mmap_params(const std::vector& states, std::vector& created_storage_blocks); bool can_mmap_storage(const TensorState& state) const; bool alloc_params_buffers(const std::vector& states, std::vector& created_storage_blocks); bool load_tensors(const std::vector& states); bool stage_tensors_to_compute_backend(const std::vector& states); ggml_backend_buffer_type_t params_buffer_type_for(const TensorState& state) const; void release_compute_staging_blocks(bool force = false, const std::unordered_set* target_states = nullptr); void release_params_storage_blocks(bool force = false, const std::unordered_set* target_states = nullptr); void free_compute_staging_block(ComputeStagingBlock& block); void free_params_storage_block(ParamsStorageBlock& block); void erase_params_storage_block(ParamsStorageBlock* block); void reset_lora_applied_params(); public: ~ModelManager() override; ModelLoader& loader() { return model_loader_; } const ModelLoader& loader() const { return model_loader_; } void set_n_threads(int n_threads) { n_threads_ = n_threads; model_loader_.set_n_threads(n_threads); } void set_enable_mmap(bool enable_mmap) { enable_mmap_ = enable_mmap; } void set_common_ignore_tensors(std::set ignore_tensors); void set_loras(std::vector loras, SDVersion version); std::set tensor_names() const; bool register_param_tensors(const std::string& desc, std::map tensors, ResidencyMode residency_mode, ggml_backend_t compute_backend, ggml_backend_t params_backend, size_t* registered_tensor_size = nullptr); template bool register_runner_params(const std::string& desc, Runner& runner, ResidencyMode residency_mode, ggml_backend_t compute_backend, ggml_backend_t params_backend, size_t* registered_tensor_size = nullptr) { std::map tensors; runner.get_param_tensors(tensors); return register_param_tensors(desc, std::move(tensors), residency_mode, compute_backend, params_backend, registered_tensor_size); } template bool register_runner_params(const std::string& desc, Runner& runner, const std::string& prefix, ResidencyMode residency_mode, ggml_backend_t compute_backend, ggml_backend_t params_backend, size_t* registered_tensor_size = nullptr) { std::map tensors; runner.get_param_tensors(tensors, prefix); return register_param_tensors(desc, std::move(tensors), residency_mode, compute_backend, params_backend, registered_tensor_size); } bool validate_registered_tensors(); bool load_all_params_eagerly(); bool prepare_params(const std::vector& tensors) override; void release_compute_backend_params(const std::vector& tensors) override; void release_params_backend_params(const std::vector& tensors) override; }; #endif // __MODEL_MANAGER_H__