mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-06-25 07:36:38 +00:00
Compare commits
4 Commits
0b8296915c
...
38b14adb67
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
38b14adb67 | ||
|
|
fd1a2794f3 | ||
|
|
db08b84607 | ||
|
|
686856edca |
@ -69,6 +69,12 @@ option(SD_BUILD_SHARED_GGML_LIB "sd: build ggml as a separate shared lib" O
|
|||||||
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
||||||
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
#option(SD_BUILD_SERVER "sd: build server example" ON)
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
if(SD_CUDA)
|
if(SD_CUDA)
|
||||||
message("-- Use CUDA as backend stable-diffusion")
|
message("-- Use CUDA as backend stable-diffusion")
|
||||||
set(GGML_CUDA ON)
|
set(GGML_CUDA ON)
|
||||||
|
|||||||
@ -55,7 +55,7 @@ Context Options:
|
|||||||
then threads will be set to the number of CPU physical cores
|
then threads will be set to the number of CPU physical cores
|
||||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
||||||
graph splitting
|
graph splitting; -1 auto-detects free VRAM minus 1 GiB
|
||||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||||
when needed
|
when needed
|
||||||
|
|||||||
@ -397,7 +397,7 @@ ArgOptions SDContextParams::get_options() {
|
|||||||
options.float_options = {
|
options.float_options = {
|
||||||
{"",
|
{"",
|
||||||
"--max-vram",
|
"--max-vram",
|
||||||
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting",
|
"maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables graph splitting; -1 auto-detects free VRAM minus 1 GiB",
|
||||||
&max_vram},
|
&max_vram},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -157,7 +157,7 @@ Context Options:
|
|||||||
then threads will be set to the number of CPU physical cores
|
then threads will be set to the number of CPU physical cores
|
||||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
--max-vram <float> maximum VRAM budget in GiB for graph-cut segmented execution. 0 disables
|
||||||
graph splitting
|
graph splitting; -1 auto-detects free VRAM minus 1 GiB
|
||||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM
|
||||||
when needed
|
when needed
|
||||||
|
|||||||
@ -205,7 +205,7 @@ typedef struct {
|
|||||||
bool chroma_use_t5_mask;
|
bool chroma_use_t5_mask;
|
||||||
int chroma_t5_mask_pad;
|
int chroma_t5_mask_pad;
|
||||||
bool qwen_image_zero_cond_t;
|
bool qwen_image_zero_cond_t;
|
||||||
float max_vram;
|
float max_vram; // GiB budget for graph-cut segmented param offload (0 = disabled, -1 = auto free VRAM minus 1 GiB)
|
||||||
} sd_ctx_params_t;
|
} sd_ctx_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|||||||
116
src/denoiser.hpp
116
src/denoiser.hpp
@ -824,45 +824,33 @@ static std::tuple<float, float, float> get_ancestral_step(float sigma_from,
|
|||||||
static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
|
static sd::Tensor<float> sample_euler_ancestral(denoise_cb_t model,
|
||||||
sd::Tensor<float> x,
|
sd::Tensor<float> x,
|
||||||
const std::vector<float>& sigmas,
|
const std::vector<float>& sigmas,
|
||||||
std::shared_ptr<RNG> rng,
|
std::shared_ptr<RNG> rng = nullptr,
|
||||||
float eta) {
|
bool is_flow_denoiser = false,
|
||||||
|
float eta = 0.f) {
|
||||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
int steps = static_cast<int>(sigmas.size()) - 1;
|
||||||
for (int i = 0; i < steps; i++) {
|
for (int i = 0; i < steps; i++) {
|
||||||
float sigma = sigmas[i];
|
float sigma = sigmas[i];
|
||||||
|
float sigma_to = sigmas[i + 1];
|
||||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||||
if (denoised_opt.empty()) {
|
if (denoised_opt.empty()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||||
sd::Tensor<float> d = (x - denoised) / sigma;
|
if (sigma_to == 0.f) {
|
||||||
auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta);
|
x = denoised;
|
||||||
x += d * (sigma_down - sigmas[i]);
|
} else if (eta == 0.f) {
|
||||||
if (sigmas[i + 1] > 0) {
|
float sigma_ratio = sigma_to / sigma;
|
||||||
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
|
x = sigma_ratio * x + (1.0 - sigma_ratio) * denoised;
|
||||||
}
|
} else {
|
||||||
}
|
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma, sigma_to, eta, is_flow_denoiser);
|
||||||
return x;
|
float sigma_ratio = sigma_down / sigma;
|
||||||
}
|
x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised;
|
||||||
|
if (sigma_up > 0.f) {
|
||||||
static sd::Tensor<float> sample_euler_flow(denoise_cb_t model,
|
if (is_flow_denoiser) {
|
||||||
sd::Tensor<float> x,
|
x *= alpha_scale;
|
||||||
const std::vector<float>& sigmas,
|
}
|
||||||
std::shared_ptr<RNG> rng,
|
x += sd::Tensor<float>::randn_like(x, rng) * sigma_up;
|
||||||
float eta) {
|
}
|
||||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
|
||||||
for (int i = 0; i < steps; i++) {
|
|
||||||
float sigma = sigmas[i];
|
|
||||||
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
|
||||||
if (denoised_opt.empty()) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
sd::Tensor<float> denoised = std::move(denoised_opt);
|
|
||||||
auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigmas[i + 1], eta);
|
|
||||||
float sigma_ratio = sigma_down / sigma;
|
|
||||||
x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised;
|
|
||||||
|
|
||||||
if (sigma_up > 0.0f) {
|
|
||||||
x = alpha_scale * x + sd::Tensor<float>::randn_like(x, rng) * sigma_up;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return x;
|
return x;
|
||||||
@ -1633,46 +1621,6 @@ static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
|
|
||||||
sd::Tensor<float> x,
|
|
||||||
const std::vector<float>& sigmas,
|
|
||||||
std::shared_ptr<RNG> rng,
|
|
||||||
float eta) {
|
|
||||||
int steps = static_cast<int>(sigmas.size()) - 1;
|
|
||||||
for (int i = 0; i < steps; i++) {
|
|
||||||
float sigma = sigmas[i];
|
|
||||||
float sigma_to = sigmas[i + 1];
|
|
||||||
|
|
||||||
auto model_output_opt = model(x, sigma, i + 1, nullptr);
|
|
||||||
if (model_output_opt.empty()) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
sd::Tensor<float> model_output = std::move(model_output_opt);
|
|
||||||
model_output = (x - model_output) * (1.0f / sigma);
|
|
||||||
|
|
||||||
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
|
|
||||||
float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f);
|
|
||||||
float beta_prod_t = 1.0f - alpha_prod_t;
|
|
||||||
|
|
||||||
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
|
|
||||||
std::sqrt(beta_prod_t) * model_output) *
|
|
||||||
(1.0f / std::sqrt(alpha_prod_t));
|
|
||||||
|
|
||||||
float beta_prod_t_prev = 1.0f - alpha_prod_t_prev;
|
|
||||||
float variance = (beta_prod_t_prev / beta_prod_t) *
|
|
||||||
(1.0f - alpha_prod_t / alpha_prod_t_prev);
|
|
||||||
float std_dev_t = eta * std::sqrt(variance);
|
|
||||||
|
|
||||||
x = pred_original_sample +
|
|
||||||
std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * model_output;
|
|
||||||
|
|
||||||
if (eta > 0) {
|
|
||||||
x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor<float>::randn_like(x, rng);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
static sd::Tensor<float> sample_tcd(denoise_cb_t model,
|
static sd::Tensor<float> sample_tcd(denoise_cb_t model,
|
||||||
sd::Tensor<float> x,
|
sd::Tensor<float> x,
|
||||||
const std::vector<float>& sigmas,
|
const std::vector<float>& sigmas,
|
||||||
@ -1715,12 +1663,12 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
|
|||||||
int timestep_s = (int)floor((1 - eta) * prev_timestep);
|
int timestep_s = (int)floor((1 - eta) * prev_timestep);
|
||||||
float sigma = sigmas[i];
|
float sigma = sigmas[i];
|
||||||
|
|
||||||
auto model_output_opt = model(x, sigma, i + 1, nullptr);
|
auto denoised_opt = model(x, sigma, i + 1, nullptr);
|
||||||
if (model_output_opt.empty()) {
|
if (denoised_opt.empty()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
sd::Tensor<float> model_output = std::move(model_output_opt);
|
sd::Tensor<float> denoised = std::move(denoised_opt);
|
||||||
model_output = (x - model_output) * (1.0f / sigma);
|
sd::Tensor<float> d = (x - denoised) / sigma;
|
||||||
|
|
||||||
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
|
float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f);
|
||||||
float beta_prod_t = 1.0f - alpha_prod_t;
|
float beta_prod_t = 1.0f - alpha_prod_t;
|
||||||
@ -1728,12 +1676,8 @@ static sd::Tensor<float> sample_tcd(denoise_cb_t model,
|
|||||||
float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]);
|
float alpha_prod_s = static_cast<float>(alphas_cumprod[timestep_s]);
|
||||||
float beta_prod_s = 1.0f - alpha_prod_s;
|
float beta_prod_s = 1.0f - alpha_prod_s;
|
||||||
|
|
||||||
sd::Tensor<float> pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) -
|
x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * denoised +
|
||||||
std::sqrt(beta_prod_t) * model_output) *
|
std::sqrt(beta_prod_s / alpha_prod_t_prev) * d;
|
||||||
(1.0f / std::sqrt(alpha_prod_t));
|
|
||||||
|
|
||||||
x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample +
|
|
||||||
std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output;
|
|
||||||
|
|
||||||
if (eta > 0 && sigma_to > 0.0f) {
|
if (eta > 0 && sigma_to > 0.0f) {
|
||||||
x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x +
|
x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x +
|
||||||
@ -1804,10 +1748,7 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
|
|||||||
const char* extra_sample_args) {
|
const char* extra_sample_args) {
|
||||||
switch (method) {
|
switch (method) {
|
||||||
case EULER_A_SAMPLE_METHOD:
|
case EULER_A_SAMPLE_METHOD:
|
||||||
if (is_flow_denoiser)
|
return sample_euler_ancestral(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
|
||||||
return sample_euler_flow(model, std::move(x), sigmas, rng, eta);
|
|
||||||
else
|
|
||||||
return sample_euler_ancestral(model, std::move(x), sigmas, rng, eta);
|
|
||||||
case EULER_SAMPLE_METHOD:
|
case EULER_SAMPLE_METHOD:
|
||||||
return sample_euler(model, std::move(x), sigmas);
|
return sample_euler(model, std::move(x), sigmas);
|
||||||
case HEUN_SAMPLE_METHOD:
|
case HEUN_SAMPLE_METHOD:
|
||||||
@ -1836,7 +1777,8 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
|
|||||||
case ER_SDE_SAMPLE_METHOD:
|
case ER_SDE_SAMPLE_METHOD:
|
||||||
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
|
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
|
||||||
case DDIM_TRAILING_SAMPLE_METHOD:
|
case DDIM_TRAILING_SAMPLE_METHOD:
|
||||||
return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta);
|
// DDIM is equivalent to Euler Ancestral with the Simple scheduler
|
||||||
|
return sample_euler_ancestral(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
|
||||||
case TCD_SAMPLE_METHOD:
|
case TCD_SAMPLE_METHOD:
|
||||||
return sample_tcd(model, std::move(x), sigmas, rng, eta);
|
return sample_tcd(model, std::move(x), sigmas, rng, eta);
|
||||||
case EULER_CFG_PP_SAMPLE_METHOD:
|
case EULER_CFG_PP_SAMPLE_METHOD:
|
||||||
|
|||||||
@ -2732,6 +2732,9 @@ public:
|
|||||||
rebuild_params_tensor_set();
|
rebuild_params_tensor_set();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
LOG_DEBUG("%s skipping params allocation (no tensors)", get_desc().c_str());
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
params_buffer = ggml_backend_alloc_ctx_tensors(params_ctx, params_backend);
|
params_buffer = ggml_backend_alloc_ctx_tensors(params_ctx, params_backend);
|
||||||
if (params_buffer == nullptr) {
|
if (params_buffer == nullptr) {
|
||||||
|
|||||||
@ -16,6 +16,9 @@
|
|||||||
|
|
||||||
namespace sd::ggml_graph_cut {
|
namespace sd::ggml_graph_cut {
|
||||||
|
|
||||||
|
static constexpr double MAX_VRAM_BYTES_PER_GIB = 1024.0 * 1024.0 * 1024.0;
|
||||||
|
static constexpr size_t MAX_VRAM_AUTO_RESERVE_BYTES = 1024ULL * 1024ULL * 1024ULL;
|
||||||
|
|
||||||
static std::string graph_cut_tensor_display_name(const ggml_tensor* tensor) {
|
static std::string graph_cut_tensor_display_name(const ggml_tensor* tensor) {
|
||||||
if (tensor == nullptr) {
|
if (tensor == nullptr) {
|
||||||
return "<null>";
|
return "<null>";
|
||||||
@ -79,6 +82,58 @@ namespace sd::ggml_graph_cut {
|
|||||||
segment.output_bytes;
|
segment.output_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t max_vram_gib_to_bytes(float max_vram) {
|
||||||
|
if (max_vram <= 0.f) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return static_cast<size_t>(static_cast<double>(max_vram) * MAX_VRAM_BYTES_PER_GIB);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float max_vram_bytes_to_gib(size_t max_vram_bytes) {
|
||||||
|
return static_cast<float>(static_cast<double>(max_vram_bytes) / MAX_VRAM_BYTES_PER_GIB);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t resolve_auto_max_vram_bytes(ggml_backend_t backend) {
|
||||||
|
if (backend == nullptr) {
|
||||||
|
LOG_WARN("--max-vram -1 requested, but no backend is available; disabling graph splitting");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||||
|
if (dev == nullptr) {
|
||||||
|
LOG_WARN("--max-vram -1 requested, but no backend device is available; disabling graph splitting");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||||
|
LOG_WARN("--max-vram -1 requested, but the main backend is CPU; disabling graph splitting");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t free_vram = 0;
|
||||||
|
size_t total_vram = 0;
|
||||||
|
ggml_backend_dev_memory(dev, &free_vram, &total_vram);
|
||||||
|
|
||||||
|
if (free_vram <= MAX_VRAM_AUTO_RESERVE_BYTES) {
|
||||||
|
LOG_WARN("--max-vram -1 requested, but free VRAM is %.2f GiB; reserving 1.00 GiB leaves no graph budget",
|
||||||
|
free_vram / MAX_VRAM_BYTES_PER_GIB);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t max_vram_bytes = free_vram - MAX_VRAM_AUTO_RESERVE_BYTES;
|
||||||
|
LOG_INFO("--max-vram -1 auto-detected %.2f GiB free VRAM (%.2f GiB total), reserving 1.00 GiB; using %.2f GiB",
|
||||||
|
free_vram / MAX_VRAM_BYTES_PER_GIB,
|
||||||
|
total_vram / MAX_VRAM_BYTES_PER_GIB,
|
||||||
|
max_vram_bytes / MAX_VRAM_BYTES_PER_GIB);
|
||||||
|
return max_vram_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
float resolve_max_vram_gib(float max_vram, ggml_backend_t backend) {
|
||||||
|
if (max_vram != -1.f) {
|
||||||
|
return max_vram;
|
||||||
|
}
|
||||||
|
return max_vram_bytes_to_gib(resolve_auto_max_vram_bytes(backend));
|
||||||
|
}
|
||||||
|
|
||||||
static Segment make_segment_seed(const Plan& plan,
|
static Segment make_segment_seed(const Plan& plan,
|
||||||
size_t start_segment_index,
|
size_t start_segment_index,
|
||||||
size_t end_segment_index) {
|
size_t end_segment_index) {
|
||||||
|
|||||||
@ -83,6 +83,8 @@ namespace sd::ggml_graph_cut {
|
|||||||
ggml_cgraph* gf,
|
ggml_cgraph* gf,
|
||||||
const Segment& segment,
|
const Segment& segment,
|
||||||
const char* log_desc);
|
const char* log_desc);
|
||||||
|
size_t max_vram_gib_to_bytes(float max_vram);
|
||||||
|
float resolve_max_vram_gib(float max_vram, ggml_backend_t backend);
|
||||||
Plan build_plan(ggml_backend_t backend,
|
Plan build_plan(ggml_backend_t backend,
|
||||||
ggml_cgraph* gf,
|
ggml_cgraph* gf,
|
||||||
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
|
const std::unordered_set<const ggml_tensor*>& params_tensor_set,
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
|
#include "ggml_graph_cut.h"
|
||||||
|
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
#include "rng.hpp"
|
#include "rng.hpp"
|
||||||
@ -209,6 +210,7 @@ public:
|
|||||||
ggml_log_set(ggml_log_callback_default, nullptr);
|
ggml_log_set(ggml_log_callback_default, nullptr);
|
||||||
|
|
||||||
init_backend();
|
init_backend();
|
||||||
|
max_vram = sd::ggml_graph_cut::resolve_max_vram_gib(max_vram, backend);
|
||||||
|
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|
||||||
@ -426,9 +428,7 @@ public:
|
|||||||
|
|
||||||
bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu;
|
bool clip_on_cpu = sd_ctx_params->keep_clip_on_cpu;
|
||||||
|
|
||||||
const size_t max_graph_vram_bytes = max_vram <= 0.f
|
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(max_vram);
|
||||||
? 0
|
|
||||||
: static_cast<size_t>(static_cast<double>(max_vram) * 1024.0 * 1024.0 * 1024.0);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
clip_backend = backend;
|
clip_backend = backend;
|
||||||
@ -3597,9 +3597,7 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s
|
|||||||
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
|
hires_upscaler = std::make_unique<UpscalerGGML>(sd_ctx->sd->n_threads,
|
||||||
false,
|
false,
|
||||||
request.hires.upscale_tile_size);
|
request.hires.upscale_tile_size);
|
||||||
const size_t max_graph_vram_bytes = sd_ctx->sd->max_vram <= 0.f
|
const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram);
|
||||||
? 0
|
|
||||||
: static_cast<size_t>(static_cast<double>(sd_ctx->sd->max_vram) * 1024.0 * 1024.0 * 1024.0);
|
|
||||||
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes);
|
||||||
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
if (!hires_upscaler->load_from_file(request.hires.model_path,
|
||||||
sd_ctx->sd->offload_params_to_cpu,
|
sd_ctx->sd->offload_params_to_cpu,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user