mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
feat: added prediction argument (#334)
This commit is contained in:
parent
a7d6d296c7
commit
e3702585cb
@ -358,6 +358,7 @@ arguments:
|
|||||||
--rng {std_default, cuda} RNG (default: cuda)
|
--rng {std_default, cuda} RNG (default: cuda)
|
||||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||||
-b, --batch-count COUNT number of images to generate
|
-b, --batch-count COUNT number of images to generate
|
||||||
|
--prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override
|
||||||
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
||||||
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
|
|||||||
@ -84,6 +84,7 @@ struct SDParams {
|
|||||||
|
|
||||||
std::string prompt;
|
std::string prompt;
|
||||||
std::string negative_prompt;
|
std::string negative_prompt;
|
||||||
|
|
||||||
int clip_skip = -1; // <= 0 represents unspecified
|
int clip_skip = -1; // <= 0 represents unspecified
|
||||||
int width = 512;
|
int width = 512;
|
||||||
int height = 512;
|
int height = 512;
|
||||||
@ -127,6 +128,8 @@ struct SDParams {
|
|||||||
int chroma_t5_mask_pad = 1;
|
int chroma_t5_mask_pad = 1;
|
||||||
float flow_shift = INFINITY;
|
float flow_shift = INFINITY;
|
||||||
|
|
||||||
|
prediction_t prediction = DEFAULT_PRED;
|
||||||
|
|
||||||
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
|
sd_tiling_params_t vae_tiling_params = {false, 0, 0, 0.5f, 0.0f, 0.0f};
|
||||||
|
|
||||||
SDParams() {
|
SDParams() {
|
||||||
@ -188,6 +191,7 @@ void print_params(SDParams params) {
|
|||||||
printf(" sample_params: %s\n", SAFE_STR(sample_params_str));
|
printf(" sample_params: %s\n", SAFE_STR(sample_params_str));
|
||||||
printf(" high_noise_sample_params: %s\n", SAFE_STR(high_noise_sample_params_str));
|
printf(" high_noise_sample_params: %s\n", SAFE_STR(high_noise_sample_params_str));
|
||||||
printf(" moe_boundary: %.3f\n", params.moe_boundary);
|
printf(" moe_boundary: %.3f\n", params.moe_boundary);
|
||||||
|
printf(" prediction: %s\n", sd_prediction_name(params.prediction));
|
||||||
printf(" flow_shift: %.2f\n", params.flow_shift);
|
printf(" flow_shift: %.2f\n", params.flow_shift);
|
||||||
printf(" strength(img2img): %.2f\n", params.strength);
|
printf(" strength(img2img): %.2f\n", params.strength);
|
||||||
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
printf(" rng: %s\n", sd_rng_type_name(params.rng_type));
|
||||||
@ -281,6 +285,7 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||||
printf(" -b, --batch-count COUNT number of images to generate\n");
|
printf(" -b, --batch-count COUNT number of images to generate\n");
|
||||||
|
printf(" --prediction {eps, v, edm_v, sd3_flow, flux_flow} Prediction type override.\n");
|
||||||
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
||||||
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
||||||
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
||||||
@ -651,6 +656,20 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto on_prediction_arg = [&](int argc, const char** argv, int index) {
|
||||||
|
if (++index >= argc) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
const char* arg = argv[index];
|
||||||
|
params.prediction = str_to_prediction(arg);
|
||||||
|
if (params.prediction == PREDICTION_COUNT) {
|
||||||
|
fprintf(stderr, "error: invalid prediction type %s\n",
|
||||||
|
arg);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
|
||||||
auto on_sample_method_arg = [&](int argc, const char** argv, int index) {
|
auto on_sample_method_arg = [&](int argc, const char** argv, int index) {
|
||||||
if (++index >= argc) {
|
if (++index >= argc) {
|
||||||
return -1;
|
return -1;
|
||||||
@ -807,6 +826,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
|
|||||||
{"", "--rng", "", on_rng_arg},
|
{"", "--rng", "", on_rng_arg},
|
||||||
{"-s", "--seed", "", on_seed_arg},
|
{"-s", "--seed", "", on_seed_arg},
|
||||||
{"", "--sampling-method", "", on_sample_method_arg},
|
{"", "--sampling-method", "", on_sample_method_arg},
|
||||||
|
{"", "--prediction", "", on_prediction_arg},
|
||||||
{"", "--scheduler", "", on_schedule_arg},
|
{"", "--scheduler", "", on_schedule_arg},
|
||||||
{"", "--skip-layers", "", on_skip_layers_arg},
|
{"", "--skip-layers", "", on_skip_layers_arg},
|
||||||
{"", "--high-noise-sampling-method", "", on_high_noise_sample_method_arg},
|
{"", "--high-noise-sampling-method", "", on_high_noise_sample_method_arg},
|
||||||
@ -1354,6 +1374,7 @@ int main(int argc, const char* argv[]) {
|
|||||||
params.n_threads,
|
params.n_threads,
|
||||||
params.wtype,
|
params.wtype,
|
||||||
params.rng_type,
|
params.rng_type,
|
||||||
|
params.prediction,
|
||||||
params.offload_params_to_cpu,
|
params.offload_params_to_cpu,
|
||||||
params.clip_on_cpu,
|
params.clip_on_cpu,
|
||||||
params.control_net_cpu,
|
params.control_net_cpu,
|
||||||
|
|||||||
@ -700,64 +700,102 @@ public:
|
|||||||
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
|
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
|
||||||
}
|
}
|
||||||
|
|
||||||
// check is_using_v_parameterization_for_sd2
|
if (sd_ctx_params->prediction != DEFAULT_PRED) {
|
||||||
if (sd_version_is_sd2(version)) {
|
switch (sd_ctx_params->prediction) {
|
||||||
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
case EPS_PRED:
|
||||||
is_using_v_parameterization = true;
|
LOG_INFO("running in eps-prediction mode");
|
||||||
}
|
break;
|
||||||
} else if (sd_version_is_sdxl(version)) {
|
case V_PRED:
|
||||||
if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) {
|
LOG_INFO("running in v-prediction mode");
|
||||||
// CosXL models
|
denoiser = std::make_shared<CompVisVDenoiser>();
|
||||||
// TODO: get sigma_min and sigma_max values from file
|
break;
|
||||||
is_using_edm_v_parameterization = true;
|
case EDM_V_PRED:
|
||||||
}
|
LOG_INFO("running in v-prediction EDM mode");
|
||||||
if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) {
|
denoiser = std::make_shared<EDMVDenoiser>();
|
||||||
is_using_v_parameterization = true;
|
break;
|
||||||
}
|
case SD3_FLOW_PRED: {
|
||||||
} else if (version == VERSION_SVD) {
|
LOG_INFO("running in FLOW mode");
|
||||||
// TODO: V_PREDICTION_EDM
|
float shift = sd_ctx_params->flow_shift;
|
||||||
is_using_v_parameterization = true;
|
if (shift == INFINITY) {
|
||||||
}
|
shift = 3.0;
|
||||||
|
}
|
||||||
if (sd_version_is_sd3(version)) {
|
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
||||||
LOG_INFO("running in FLOW mode");
|
|
||||||
float shift = sd_ctx_params->flow_shift;
|
|
||||||
if (shift == INFINITY) {
|
|
||||||
shift = 3.0;
|
|
||||||
}
|
|
||||||
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
|
||||||
} else if (sd_version_is_flux(version)) {
|
|
||||||
LOG_INFO("running in Flux FLOW mode");
|
|
||||||
float shift = 1.0f; // TODO: validate
|
|
||||||
for (auto pair : model_loader.tensor_storages_types) {
|
|
||||||
if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
|
|
||||||
shift = 1.15f;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case FLUX_FLOW_PRED: {
|
||||||
|
LOG_INFO("running in Flux FLOW mode");
|
||||||
|
float shift = sd_ctx_params->flow_shift;
|
||||||
|
if (shift == INFINITY) {
|
||||||
|
shift = 3.0;
|
||||||
|
}
|
||||||
|
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
LOG_ERROR("Unknown parametrization %i", sd_ctx_params->prediction);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
|
|
||||||
} else if (sd_version_is_wan(version)) {
|
|
||||||
LOG_INFO("running in FLOW mode");
|
|
||||||
float shift = sd_ctx_params->flow_shift;
|
|
||||||
if (shift == INFINITY) {
|
|
||||||
shift = 5.0;
|
|
||||||
}
|
|
||||||
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
|
||||||
} else if (sd_version_is_qwen_image(version)) {
|
|
||||||
LOG_INFO("running in FLOW mode");
|
|
||||||
float shift = sd_ctx_params->flow_shift;
|
|
||||||
if (shift == INFINITY) {
|
|
||||||
shift = 3.0;
|
|
||||||
}
|
|
||||||
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
|
||||||
} else if (is_using_v_parameterization) {
|
|
||||||
LOG_INFO("running in v-prediction mode");
|
|
||||||
denoiser = std::make_shared<CompVisVDenoiser>();
|
|
||||||
} else if (is_using_edm_v_parameterization) {
|
|
||||||
LOG_INFO("running in v-prediction EDM mode");
|
|
||||||
denoiser = std::make_shared<EDMVDenoiser>();
|
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("running in eps-prediction mode");
|
if (sd_version_is_sd2(version)) {
|
||||||
|
// check is_using_v_parameterization_for_sd2
|
||||||
|
if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) {
|
||||||
|
is_using_v_parameterization = true;
|
||||||
|
}
|
||||||
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
|
if (model_loader.tensor_storages_types.find("edm_vpred.sigma_max") != model_loader.tensor_storages_types.end()) {
|
||||||
|
// CosXL models
|
||||||
|
// TODO: get sigma_min and sigma_max values from file
|
||||||
|
is_using_edm_v_parameterization = true;
|
||||||
|
}
|
||||||
|
if (model_loader.tensor_storages_types.find("v_pred") != model_loader.tensor_storages_types.end()) {
|
||||||
|
is_using_v_parameterization = true;
|
||||||
|
}
|
||||||
|
} else if (version == VERSION_SVD) {
|
||||||
|
// TODO: V_PREDICTION_EDM
|
||||||
|
is_using_v_parameterization = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sd_version_is_sd3(version)) {
|
||||||
|
LOG_INFO("running in FLOW mode");
|
||||||
|
float shift = sd_ctx_params->flow_shift;
|
||||||
|
if (shift == INFINITY) {
|
||||||
|
shift = 3.0;
|
||||||
|
}
|
||||||
|
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
||||||
|
} else if (sd_version_is_flux(version)) {
|
||||||
|
LOG_INFO("running in Flux FLOW mode");
|
||||||
|
float shift = 1.0f; // TODO: validate
|
||||||
|
for (auto pair : model_loader.tensor_storages_types) {
|
||||||
|
if (pair.first.find("model.diffusion_model.guidance_in.in_layer.weight") != std::string::npos) {
|
||||||
|
shift = 1.15f;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
denoiser = std::make_shared<FluxFlowDenoiser>(shift);
|
||||||
|
} else if (sd_version_is_wan(version)) {
|
||||||
|
LOG_INFO("running in FLOW mode");
|
||||||
|
float shift = sd_ctx_params->flow_shift;
|
||||||
|
if (shift == INFINITY) {
|
||||||
|
shift = 5.0;
|
||||||
|
}
|
||||||
|
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
||||||
|
} else if (sd_version_is_qwen_image(version)) {
|
||||||
|
LOG_INFO("running in FLOW mode");
|
||||||
|
float shift = sd_ctx_params->flow_shift;
|
||||||
|
if (shift == INFINITY) {
|
||||||
|
shift = 3.0;
|
||||||
|
}
|
||||||
|
denoiser = std::make_shared<DiscreteFlowDenoiser>(shift);
|
||||||
|
} else if (is_using_v_parameterization) {
|
||||||
|
LOG_INFO("running in v-prediction mode");
|
||||||
|
denoiser = std::make_shared<CompVisVDenoiser>();
|
||||||
|
} else if (is_using_edm_v_parameterization) {
|
||||||
|
LOG_INFO("running in v-prediction EDM mode");
|
||||||
|
denoiser = std::make_shared<EDMVDenoiser>();
|
||||||
|
} else {
|
||||||
|
LOG_INFO("running in eps-prediction mode");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto comp_vis_denoiser = std::dynamic_pointer_cast<CompVisDenoiser>(denoiser);
|
auto comp_vis_denoiser = std::dynamic_pointer_cast<CompVisDenoiser>(denoiser);
|
||||||
@ -1742,6 +1780,31 @@ enum scheduler_t str_to_schedule(const char* str) {
|
|||||||
return SCHEDULE_COUNT;
|
return SCHEDULE_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char* prediction_to_str[] = {
|
||||||
|
"default",
|
||||||
|
"eps",
|
||||||
|
"v",
|
||||||
|
"edm_v",
|
||||||
|
"sd3_flow",
|
||||||
|
"flux_flow",
|
||||||
|
};
|
||||||
|
|
||||||
|
const char* sd_prediction_name(enum prediction_t prediction) {
|
||||||
|
if (prediction < PREDICTION_COUNT) {
|
||||||
|
return prediction_to_str[prediction];
|
||||||
|
}
|
||||||
|
return NONE_STR;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum prediction_t str_to_prediction(const char* str) {
|
||||||
|
for (int i = 0; i < PREDICTION_COUNT; i++) {
|
||||||
|
if (!strcmp(str, prediction_to_str[i])) {
|
||||||
|
return (enum prediction_t)i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return PREDICTION_COUNT;
|
||||||
|
}
|
||||||
|
|
||||||
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
||||||
*sd_ctx_params = {};
|
*sd_ctx_params = {};
|
||||||
sd_ctx_params->vae_decode_only = true;
|
sd_ctx_params->vae_decode_only = true;
|
||||||
@ -1749,6 +1812,7 @@ void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_ctx_params->n_threads = get_num_physical_cores();
|
sd_ctx_params->n_threads = get_num_physical_cores();
|
||||||
sd_ctx_params->wtype = SD_TYPE_COUNT;
|
sd_ctx_params->wtype = SD_TYPE_COUNT;
|
||||||
sd_ctx_params->rng_type = CUDA_RNG;
|
sd_ctx_params->rng_type = CUDA_RNG;
|
||||||
|
sd_ctx_params->prediction = DEFAULT_PRED;
|
||||||
sd_ctx_params->offload_params_to_cpu = false;
|
sd_ctx_params->offload_params_to_cpu = false;
|
||||||
sd_ctx_params->keep_clip_on_cpu = false;
|
sd_ctx_params->keep_clip_on_cpu = false;
|
||||||
sd_ctx_params->keep_control_net_on_cpu = false;
|
sd_ctx_params->keep_control_net_on_cpu = false;
|
||||||
@ -1788,6 +1852,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
"n_threads: %d\n"
|
"n_threads: %d\n"
|
||||||
"wtype: %s\n"
|
"wtype: %s\n"
|
||||||
"rng_type: %s\n"
|
"rng_type: %s\n"
|
||||||
|
"prediction: %s\n"
|
||||||
"offload_params_to_cpu: %s\n"
|
"offload_params_to_cpu: %s\n"
|
||||||
"keep_clip_on_cpu: %s\n"
|
"keep_clip_on_cpu: %s\n"
|
||||||
"keep_control_net_on_cpu: %s\n"
|
"keep_control_net_on_cpu: %s\n"
|
||||||
@ -1816,6 +1881,7 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) {
|
|||||||
sd_ctx_params->n_threads,
|
sd_ctx_params->n_threads,
|
||||||
sd_type_name(sd_ctx_params->wtype),
|
sd_type_name(sd_ctx_params->wtype),
|
||||||
sd_rng_type_name(sd_ctx_params->rng_type),
|
sd_rng_type_name(sd_ctx_params->rng_type),
|
||||||
|
sd_prediction_name(sd_ctx_params->prediction),
|
||||||
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
|
BOOL_STR(sd_ctx_params->offload_params_to_cpu),
|
||||||
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
|
BOOL_STR(sd_ctx_params->keep_clip_on_cpu),
|
||||||
BOOL_STR(sd_ctx_params->keep_control_net_on_cpu),
|
BOOL_STR(sd_ctx_params->keep_control_net_on_cpu),
|
||||||
|
|||||||
@ -64,6 +64,16 @@ enum scheduler_t {
|
|||||||
SCHEDULE_COUNT
|
SCHEDULE_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum prediction_t {
|
||||||
|
DEFAULT_PRED,
|
||||||
|
EPS_PRED,
|
||||||
|
V_PRED,
|
||||||
|
EDM_V_PRED,
|
||||||
|
SD3_FLOW_PRED,
|
||||||
|
FLUX_FLOW_PRED,
|
||||||
|
PREDICTION_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
// same as enum ggml_type
|
// same as enum ggml_type
|
||||||
enum sd_type_t {
|
enum sd_type_t {
|
||||||
SD_TYPE_F32 = 0,
|
SD_TYPE_F32 = 0,
|
||||||
@ -146,6 +156,7 @@ typedef struct {
|
|||||||
int n_threads;
|
int n_threads;
|
||||||
enum sd_type_t wtype;
|
enum sd_type_t wtype;
|
||||||
enum rng_type_t rng_type;
|
enum rng_type_t rng_type;
|
||||||
|
enum prediction_t prediction;
|
||||||
bool offload_params_to_cpu;
|
bool offload_params_to_cpu;
|
||||||
bool keep_clip_on_cpu;
|
bool keep_clip_on_cpu;
|
||||||
bool keep_control_net_on_cpu;
|
bool keep_control_net_on_cpu;
|
||||||
@ -255,6 +266,8 @@ SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
|
|||||||
SD_API enum sample_method_t str_to_sample_method(const char* str);
|
SD_API enum sample_method_t str_to_sample_method(const char* str);
|
||||||
SD_API const char* sd_schedule_name(enum scheduler_t scheduler);
|
SD_API const char* sd_schedule_name(enum scheduler_t scheduler);
|
||||||
SD_API enum scheduler_t str_to_schedule(const char* str);
|
SD_API enum scheduler_t str_to_schedule(const char* str);
|
||||||
|
SD_API const char* sd_prediction_name(enum prediction_t prediction);
|
||||||
|
SD_API enum prediction_t str_to_prediction(const char* str);
|
||||||
|
|
||||||
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
||||||
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user