mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-09 15:56:39 +00:00
refactor: call CPU backend functions dynamically (#1591)
Co-authored-by: leejet <leejet714@gmail.com>
This commit is contained in:
parent
f8935d6f25
commit
02f06370a7
@ -8,7 +8,7 @@
|
|||||||
#include "model_io/safetensors_io.h"
|
#include "model_io/safetensors_io.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
#include "ggml-cpu.h"
|
#include "ggml_extend_backend.h"
|
||||||
|
|
||||||
static ggml_type get_export_tensor_type(ModelLoader& model_loader,
|
static ggml_type get_export_tensor_type(ModelLoader& model_loader,
|
||||||
const TensorStorage& tensor_storage,
|
const TensorStorage& tensor_storage,
|
||||||
@ -103,7 +103,7 @@ bool convert(const char* input_path,
|
|||||||
bool output_is_safetensors = ends_with(output_path, ".safetensors");
|
bool output_is_safetensors = ends_with(output_path, ".safetensors");
|
||||||
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
|
TensorTypeRules type_rules = parse_tensor_type_rules(tensor_type_rules);
|
||||||
|
|
||||||
auto backend = ggml_backend_cpu_init();
|
auto backend = sd_backend_cpu_init();
|
||||||
size_t mem_size = 1 * 1024 * 1024; // for padding
|
size_t mem_size = 1 * 1024 * 1024; // for padding
|
||||||
mem_size += model_loader.get_tensor_storage_map().size() * ggml_tensor_overhead();
|
mem_size += model_loader.get_tensor_storage_map().size() * ggml_tensor_overhead();
|
||||||
mem_size += model_loader.get_params_mem_size(backend, type);
|
mem_size += model_loader.get_params_mem_size(backend, type);
|
||||||
|
|||||||
@ -1567,7 +1567,7 @@ namespace Flux {
|
|||||||
|
|
||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
@ -1442,7 +1442,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_group_norm(ggml_context* ctx,
|
|||||||
|
|
||||||
__STATIC_INLINE__ void ggml_ext_backend_tensor_get_and_sync(ggml_backend_t backend, const ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
__STATIC_INLINE__ void ggml_ext_backend_tensor_get_and_sync(ggml_backend_t backend, const ggml_tensor* tensor, void* data, size_t offset, size_t size) {
|
||||||
if ((sd_backend_is(backend, "ROCm") || sd_backend_is(backend, "CUDA") || sd_backend_is(backend, "SYCL")) &&
|
if ((sd_backend_is(backend, "ROCm") || sd_backend_is(backend, "CUDA") || sd_backend_is(backend, "SYCL")) &&
|
||||||
!ggml_backend_is_cpu(backend)) {
|
!sd_backend_is_cpu(backend)) {
|
||||||
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
ggml_backend_tensor_get_async(backend, tensor, data, offset, size);
|
||||||
ggml_backend_synchronize(backend);
|
ggml_backend_synchronize(backend);
|
||||||
return;
|
return;
|
||||||
@ -1899,7 +1899,7 @@ protected:
|
|||||||
LOG_DEBUG("%s compute buffer size: %.2f MB(%s)",
|
LOG_DEBUG("%s compute buffer size: %.2f MB(%s)",
|
||||||
get_desc().c_str(),
|
get_desc().c_str(),
|
||||||
compute_buffer_size / 1024.0 / 1024.0,
|
compute_buffer_size / 1024.0 / 1024.0,
|
||||||
ggml_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM");
|
sd_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1986,7 +1986,7 @@ protected:
|
|||||||
LOG_DEBUG("%s cache backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
LOG_DEBUG("%s cache backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
||||||
get_desc().c_str(),
|
get_desc().c_str(),
|
||||||
cache_buffer_size / (1024.f * 1024.f),
|
cache_buffer_size / (1024.f * 1024.f),
|
||||||
ggml_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM",
|
sd_backend_is_cpu(runtime_backend) ? "RAM" : "VRAM",
|
||||||
num_tensors);
|
num_tensors);
|
||||||
if (old_cache_buffer != nullptr) {
|
if (old_cache_buffer != nullptr) {
|
||||||
ggml_backend_buffer_free(old_cache_buffer);
|
ggml_backend_buffer_free(old_cache_buffer);
|
||||||
@ -2293,13 +2293,13 @@ protected:
|
|||||||
max_graph_vram_bytes > 0 &&
|
max_graph_vram_bytes > 0 &&
|
||||||
plan.segments.size() > 1 &&
|
plan.segments.size() > 1 &&
|
||||||
params_backend != runtime_backend &&
|
params_backend != runtime_backend &&
|
||||||
!ggml_backend_is_cpu(runtime_backend);
|
!sd_backend_is_cpu(runtime_backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_attempt_graph_cut_segmented_compute() const {
|
bool can_attempt_graph_cut_segmented_compute() const {
|
||||||
return max_graph_vram_bytes > 0 &&
|
return max_graph_vram_bytes > 0 &&
|
||||||
params_backend != runtime_backend &&
|
params_backend != runtime_backend &&
|
||||||
!ggml_backend_is_cpu(runtime_backend);
|
!sd_backend_is_cpu(runtime_backend);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool resolve_graph_cut_plan(ggml_cgraph* gf,
|
bool resolve_graph_cut_plan(ggml_cgraph* gf,
|
||||||
@ -2436,8 +2436,8 @@ protected:
|
|||||||
int64_t t_copy_begin = ggml_time_ms();
|
int64_t t_copy_begin = ggml_time_ms();
|
||||||
copy_data_to_backend_tensor(gf, !preserve_backend_tensor_data_map);
|
copy_data_to_backend_tensor(gf, !preserve_backend_tensor_data_map);
|
||||||
int64_t t_copy_end = ggml_time_ms();
|
int64_t t_copy_end = ggml_time_ms();
|
||||||
if (ggml_backend_is_cpu(runtime_backend)) {
|
if (sd_backend_is_cpu(runtime_backend)) {
|
||||||
ggml_backend_cpu_set_n_threads(runtime_backend, n_threads);
|
sd_backend_cpu_set_n_threads(runtime_backend, n_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t t_compute_begin = ggml_time_ms();
|
int64_t t_compute_begin = ggml_time_ms();
|
||||||
@ -2679,7 +2679,7 @@ public:
|
|||||||
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
|
||||||
get_desc().c_str(),
|
get_desc().c_str(),
|
||||||
params_buffer_size / (1024.f * 1024.f),
|
params_buffer_size / (1024.f * 1024.f),
|
||||||
ggml_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
|
sd_backend_is_cpu(params_backend) ? "RAM" : "VRAM",
|
||||||
num_tensors);
|
num_tensors);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -2746,7 +2746,7 @@ public:
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
// it's performing a compute, check if backend isn't cpu
|
// it's performing a compute, check if backend isn't cpu
|
||||||
if (!ggml_backend_is_cpu(runtime_backend) && (tensor->buffer == nullptr || ggml_backend_buffer_is_host(tensor->buffer))) {
|
if (!sd_backend_is_cpu(runtime_backend) && (tensor->buffer == nullptr || ggml_backend_buffer_is_host(tensor->buffer))) {
|
||||||
// pass input tensors to gpu memory
|
// pass input tensors to gpu memory
|
||||||
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);
|
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "stable-diffusion.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
static std::string trim_copy(const std::string& value) {
|
static std::string trim_copy(const std::string& value) {
|
||||||
@ -300,6 +301,61 @@ static ggml_backend_t init_named_backend(const std::string& name) {
|
|||||||
return ggml_backend_init_by_name(resolved.c_str(), nullptr);
|
return ggml_backend_init_by_name(resolved.c_str(), nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool sd_backend_is_cpu(ggml_backend_t backend) {
|
||||||
|
if (backend == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto dev = ggml_backend_get_device(backend);
|
||||||
|
return dev != nullptr && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_backend_t sd_backend_cpu_init() {
|
||||||
|
ggml_backend_load_all_once();
|
||||||
|
return ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool sd_backend_cpu_set_n_threads(ggml_backend_t backend, int n_threads) {
|
||||||
|
if (backend == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto dev = ggml_backend_get_device(backend);
|
||||||
|
if (dev != nullptr && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||||
|
auto reg = ggml_backend_dev_backend_reg(dev);
|
||||||
|
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t)ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||||
|
if (ggml_backend_set_n_threads_fn != nullptr) {
|
||||||
|
ggml_backend_set_n_threads_fn(backend, n_threads);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* sd_get_system_info() {
|
||||||
|
static std::string cache_info = []() -> std::string {
|
||||||
|
ggml_backend_load_all_once();
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << "System Info: \n";
|
||||||
|
auto dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
|
||||||
|
if (dev != nullptr) {
|
||||||
|
auto reg = ggml_backend_dev_backend_reg(dev);
|
||||||
|
auto ggml_backend_get_features_fn = (ggml_backend_get_features_t)ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
|
||||||
|
if (ggml_backend_get_features_fn != nullptr) {
|
||||||
|
ggml_backend_feature* feat = ggml_backend_get_features_fn(reg);
|
||||||
|
while (feat->name && feat->value) {
|
||||||
|
ss << " " << feat->name << " = " << feat->value << " | ";
|
||||||
|
feat++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG_WARN("unable to get CPU features");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG_WARN("unable to get CPU features");
|
||||||
|
}
|
||||||
|
return ss.str();
|
||||||
|
}();
|
||||||
|
return cache_info.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
static ggml_backend_t sd_get_default_backend() {
|
static ggml_backend_t sd_get_default_backend() {
|
||||||
ggml_backend_load_all_once();
|
ggml_backend_load_all_once();
|
||||||
static std::once_flag once;
|
static std::once_flag once;
|
||||||
@ -349,10 +405,10 @@ static ggml_backend_t sd_get_default_backend() {
|
|||||||
|
|
||||||
if (!backend) {
|
if (!backend) {
|
||||||
LOG_WARN("loading CPU backend");
|
LOG_WARN("loading CPU backend");
|
||||||
backend = ggml_backend_cpu_init();
|
backend = sd_backend_cpu_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ggml_backend_is_cpu(backend)) {
|
if (sd_backend_is_cpu(backend)) {
|
||||||
LOG_DEBUG("Using CPU backend");
|
LOG_DEBUG("Using CPU backend");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -452,11 +508,11 @@ ggml_backend_t SDBackendManager::params_backend(SDBackendModule module) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool SDBackendManager::runtime_backend_is_cpu(SDBackendModule module) {
|
bool SDBackendManager::runtime_backend_is_cpu(SDBackendModule module) {
|
||||||
return ggml_backend_is_cpu(runtime_backend(module));
|
return sd_backend_is_cpu(runtime_backend(module));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SDBackendManager::params_backend_is_cpu(SDBackendModule module) {
|
bool SDBackendManager::params_backend_is_cpu(SDBackendModule module) {
|
||||||
return ggml_backend_is_cpu(params_backend(module));
|
return sd_backend_is_cpu(params_backend(module));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule module) {
|
bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule module) {
|
||||||
@ -464,7 +520,7 @@ bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule modu
|
|||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ggml_backend_is_cpu(backend)) {
|
if (sd_backend_is_cpu(backend)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||||
|
|||||||
@ -8,7 +8,6 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
#include "ggml-cpu.h"
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
enum class SDBackendModule {
|
enum class SDBackendModule {
|
||||||
@ -72,6 +71,9 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
|
bool sd_backend_is(ggml_backend_t backend, const std::string& name);
|
||||||
|
bool sd_backend_is_cpu(ggml_backend_t backend);
|
||||||
|
ggml_backend_t sd_backend_cpu_init();
|
||||||
|
bool sd_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
|
||||||
const char* sd_backend_module_name(SDBackendModule module);
|
const char* sd_backend_module_name(SDBackendModule module);
|
||||||
void ggml_ext_im_set_f32_1d(const struct ggml_tensor* tensor, int i, float value);
|
void ggml_ext_im_set_f32_1d(const struct ggml_tensor* tensor, int i, float value);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -2041,7 +2041,7 @@ namespace LLM {
|
|||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// cpu f16: pass
|
// cpu f16: pass
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_COUNT;
|
ggml_type model_data_type = GGML_TYPE_COUNT;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
@ -772,7 +772,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* original_tensor = model_tensor;
|
ggml_tensor* original_tensor = model_tensor;
|
||||||
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
if (!sd_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
||||||
model_tensor = ggml_dup_tensor(compute_ctx, model_tensor);
|
model_tensor = ggml_dup_tensor(compute_ctx, model_tensor);
|
||||||
set_backend_tensor_data(model_tensor, original_tensor->data);
|
set_backend_tensor_data(model_tensor, original_tensor->data);
|
||||||
}
|
}
|
||||||
@ -786,7 +786,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
final_tensor = ggml_add_inplace(compute_ctx, model_tensor, diff);
|
final_tensor = ggml_add_inplace(compute_ctx, model_tensor, diff);
|
||||||
}
|
}
|
||||||
ggml_build_forward_expand(gf, final_tensor);
|
ggml_build_forward_expand(gf, final_tensor);
|
||||||
if (!ggml_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
if (!sd_backend_is_cpu(runtime_backend) && ggml_backend_buffer_is_host(original_tensor->buffer)) {
|
||||||
original_tensor_to_final_tensor[original_tensor] = final_tensor;
|
original_tensor_to_final_tensor[original_tensor] = final_tensor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1052,7 +1052,7 @@ namespace LTXV {
|
|||||||
static void load_from_file_and_test(const std::string& model_path,
|
static void load_from_file_and_test(const std::string& model_path,
|
||||||
const std::string& input_path,
|
const std::string& input_path,
|
||||||
const std::string& prefix = "") {
|
const std::string& prefix = "") {
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
|
LOG_INFO("loading ltx audio vae from '%s'", model_path.c_str());
|
||||||
|
|
||||||
|
|||||||
@ -1517,7 +1517,7 @@ struct LTXVideoVAE : public VAE {
|
|||||||
static void load_from_file_and_test(const std::string& model_path,
|
static void load_from_file_and_test(const std::string& model_path,
|
||||||
const std::string& input_path) {
|
const std::string& input_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
|
LOG_INFO("loading ltx vae from '%s'", model_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
@ -1995,7 +1995,7 @@ namespace LTXV {
|
|||||||
const std::string& audio_x_path = "",
|
const std::string& audio_x_path = "",
|
||||||
const std::string& audio_timesteps_path = "") {
|
const std::string& audio_timesteps_path = "") {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
|
LOG_INFO("loading ltxav from '%s'", model_path.c_str());
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
@ -947,7 +947,7 @@ struct MMDiTRunner : public DiffusionModelRunner {
|
|||||||
|
|
||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, backend);
|
std::shared_ptr<MMDiTRunner> mmdit = std::make_shared<MMDiTRunner>(backend, backend);
|
||||||
{
|
{
|
||||||
|
|||||||
@ -683,7 +683,7 @@ namespace Qwen {
|
|||||||
// cuda q8: pass
|
// cuda q8: pass
|
||||||
// cuda q8 fa: pass
|
// cuda q8 fa: pass
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
@ -1108,7 +1108,7 @@ public:
|
|||||||
if (module_backend == nullptr) {
|
if (module_backend == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ggml_backend_is_cpu(module_backend)) {
|
if (sd_backend_is_cpu(module_backend)) {
|
||||||
total_params_ram_size += size;
|
total_params_ram_size += size;
|
||||||
} else {
|
} else {
|
||||||
total_params_vram_size += size;
|
total_params_vram_size += size;
|
||||||
@ -1123,7 +1123,7 @@ public:
|
|||||||
if (module_backend == nullptr) {
|
if (module_backend == nullptr) {
|
||||||
return "N/A";
|
return "N/A";
|
||||||
}
|
}
|
||||||
return ggml_backend_is_cpu(module_backend) ? "RAM" : "VRAM";
|
return sd_backend_is_cpu(module_backend) ? "RAM" : "VRAM";
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!add_params_memory(clip_params_mem_size, SDBackendModule::TE) ||
|
if (!add_params_memory(clip_params_mem_size, SDBackendModule::TE) ||
|
||||||
|
|||||||
@ -563,7 +563,7 @@ struct T5Embedder {
|
|||||||
// cuda f32: pass
|
// cuda f32: pass
|
||||||
// cuda q8_0: pass
|
// cuda q8_0: pass
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
29
src/util.cpp
29
src/util.cpp
@ -25,9 +25,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "ggml-backend.h"
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml_extend_backend.h"
|
|
||||||
#include "stable-diffusion.h"
|
#include "stable-diffusion.h"
|
||||||
|
|
||||||
bool ends_with(const std::string& str, const std::string& ending) {
|
bool ends_with(const std::string& str, const std::string& ending) {
|
||||||
@ -974,30 +972,3 @@ std::vector<std::pair<std::string, float>> split_quotation_attention(
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// namespace is needed to avoid conflicts with ggml_backend_extend.hpp
|
|
||||||
namespace ggml_cpu {
|
|
||||||
#include "ggml-cpu.h"
|
|
||||||
}
|
|
||||||
|
|
||||||
const char* sd_get_system_info() {
|
|
||||||
using namespace ggml_cpu;
|
|
||||||
static char buffer[1024];
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << "System Info: \n";
|
|
||||||
ss << " SSE3 = " << ggml_cpu_has_sse3() << " | ";
|
|
||||||
ss << " AVX = " << ggml_cpu_has_avx() << " | ";
|
|
||||||
ss << " AVX2 = " << ggml_cpu_has_avx2() << " | ";
|
|
||||||
ss << " AVX512 = " << ggml_cpu_has_avx512() << " | ";
|
|
||||||
ss << " AVX512_VBMI = " << ggml_cpu_has_avx512_vbmi() << " | ";
|
|
||||||
ss << " AVX512_VNNI = " << ggml_cpu_has_avx512_vnni() << " | ";
|
|
||||||
ss << " FMA = " << ggml_cpu_has_fma() << " | ";
|
|
||||||
ss << " NEON = " << ggml_cpu_has_neon() << " | ";
|
|
||||||
ss << " ARM_FMA = " << ggml_cpu_has_arm_fma() << " | ";
|
|
||||||
ss << " F16C = " << ggml_cpu_has_f16c() << " | ";
|
|
||||||
ss << " FP16_VA = " << ggml_cpu_has_fp16_va() << " | ";
|
|
||||||
ss << " WASM_SIMD = " << ggml_cpu_has_wasm_simd() << " | ";
|
|
||||||
ss << " VSX = " << ggml_cpu_has_vsx() << " | ";
|
|
||||||
snprintf(buffer, sizeof(buffer), "%s", ss.str().c_str());
|
|
||||||
return buffer;
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1328,7 +1328,7 @@ namespace WAN {
|
|||||||
|
|
||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, backend, String2TensorStorage{}, "", false, VERSION_WAN2_2_TI2V);
|
std::shared_ptr<WanVAERunner> vae = std::make_shared<WanVAERunner>(backend, backend, String2TensorStorage{}, "", false, VERSION_WAN2_2_TI2V);
|
||||||
{
|
{
|
||||||
@ -2348,7 +2348,7 @@ namespace WAN {
|
|||||||
|
|
||||||
static void load_from_file_and_test(const std::string& file_path) {
|
static void load_from_file_and_test(const std::string& file_path) {
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_F16;
|
ggml_type model_data_type = GGML_TYPE_F16;
|
||||||
LOG_INFO("loading from '%s'", file_path.c_str());
|
LOG_INFO("loading from '%s'", file_path.c_str());
|
||||||
|
|
||||||
|
|||||||
@ -615,7 +615,7 @@ namespace ZImage {
|
|||||||
// cuda q8: pass
|
// cuda q8: pass
|
||||||
// cuda q8 fa: pass
|
// cuda q8 fa: pass
|
||||||
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
// ggml_backend_t backend = ggml_backend_cuda_init(0);
|
||||||
ggml_backend_t backend = ggml_backend_cpu_init();
|
ggml_backend_t backend = sd_backend_cpu_init();
|
||||||
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
ggml_type model_data_type = GGML_TYPE_Q8_0;
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user