refactor: update c api (#728)

This commit is contained in:
leejet 2025-07-13 18:48:42 +08:00 committed by GitHub
parent a772dca27a
commit ca0bd9396e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1200 additions and 1318 deletions

View File

@ -282,14 +282,14 @@ usage: ./bin/sd [arguments]
arguments:
-h, --help show this help message and exit
-M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)
-M, --mode [MODE] run mode, one of: [img_gen, convert], default: img_gen
-t, --threads N number of threads to use during computation (default: -1)
If threads <= 0, then threads will be set to the number of CPU physical cores
-m, --model [MODEL] path to full model
--diffusion-model path to the standalone diffusion model
--clip_l path to the clip-l text encoder
--clip_g path to the clip-g text encoder
--t5xxl path to the the t5xxl text encoder
--t5xxl path to the t5xxl text encoder
--vae [VAE] path to vae
--taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
--control-net [CONTROL_PATH] path to control net model
@ -301,16 +301,18 @@ arguments:
--upscale-repeats Run the ESRGAN upscaler this many times (default 1)
--type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)
If not specified, the default is the type of the weight file
--tensor-type-rules [EXPRESSION] weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
--lora-model-dir [DIR] lora model directory
-i, --init-img [IMAGE] path to the input image, required by img2img
--mask [MASK] path to the mask image, required by img2img with mask
--control-image [IMAGE] path to image condition, control net
-r, --ref_image [PATH] reference image for Flux Kontext models (can be used multiple times)
-r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times)
-o, --output OUTPUT path to write result image to (default: ./output.png)
-p, --prompt [PROMPT] the prompt to render
-n, --negative-prompt PROMPT the negative prompt (default: "")
--cfg-scale SCALE unconditional guidance scale: (default: 7.0)
--guidance SCALE guidance scale for img2img (default: 3.5)
--img-cfg-scale SCALE image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
--guidance SCALE distilled guidance scale for models with guidance input (default: 3.5)
--slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)
0 means disabled, a value of 2.5 is nice for sd3.5 medium
--eta SCALE eta in DDIM, only for DDIM and TCD: (default: 0)
@ -319,7 +321,7 @@ arguments:
--skip-layer-end END SLG disabling point: (default: 0.2)
SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])
--strength STRENGTH strength for noising/unnoising (default: 0.75)
--style-ratio STYLE-RATIO strength for keeping input identity (default: 20%)
--style-ratio STYLE-RATIO strength for keeping input identity (default: 20)
--control-strength STRENGTH strength to apply Control Net (default: 0.9)
1.0 corresponds to full destruction of information in init image
-H, --height H image height, in pixel space (default: 512)
@ -371,7 +373,7 @@ Using formats of different precisions will yield results of varying quality.
```
./bin/sd --mode img2img -m ../models/sd-v1-4.ckpt -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
./bin/sd -m ../models/sd-v1-4.ckpt -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
```
<p align="center">

View File

@ -27,7 +27,7 @@ You can download the preconverted gguf weights from [FLUX.1-Kontext-dev-GGUF](ht
For example:
```
.\bin\Release\sd.exe -M edit -r .\flux1-dev-q8_0.png --diffusion-model ..\models\flux1-kontext-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "change 'flux.cpp' to 'kontext.cpp'" --cfg-scale 1.0 --sampling-method euler -v
.\bin\Release\sd.exe -r .\flux1-dev-q8_0.png --diffusion-model ..\models\flux1-kontext-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "change 'flux.cpp' to 'kontext.cpp'" --cfg-scale 1.0 --sampling-method euler -v
```

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,8 @@ extern "C" {
enum rng_type_t {
STD_DEFAULT_RNG,
CUDA_RNG
CUDA_RNG,
RNG_TYPE_COUNT
};
enum sample_method_t {
@ -46,7 +47,7 @@ enum sample_method_t {
LCM,
DDIM_TRAILING,
TCD,
N_SAMPLE_METHODS
SAMPLE_METHOD_COUNT
};
enum schedule_t {
@ -56,7 +57,7 @@ enum schedule_t {
EXPONENTIAL,
AYS,
GITS,
N_SCHEDULES
SCHEDULE_COUNT
};
// same as enum ggml_type
@ -103,8 +104,6 @@ enum sd_type_t {
SD_TYPE_COUNT = 39,
};
SD_API const char* sd_type_name(enum sd_type_t type);
enum sd_log_level_t {
SD_LOG_DEBUG,
SD_LOG_INFO,
@ -112,13 +111,33 @@ enum sd_log_level_t {
SD_LOG_ERROR
};
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
SD_API int32_t get_num_physical_cores();
SD_API const char* sd_get_system_info();
typedef struct {
const char* model_path;
const char* clip_l_path;
const char* clip_g_path;
const char* t5xxl_path;
const char* diffusion_model_path;
const char* vae_path;
const char* taesd_path;
const char* control_net_path;
const char* lora_model_dir;
const char* embedding_dir;
const char* stacked_id_embed_dir;
bool vae_decode_only;
bool vae_tiling;
bool free_params_immediately;
int n_threads;
enum sd_type_t wtype;
enum rng_type_t rng_type;
enum schedule_t schedule;
bool keep_clip_on_cpu;
bool keep_control_net_on_cpu;
bool keep_vae_on_cpu;
bool diffusion_flash_attn;
bool chroma_use_dit_mask;
bool chroma_use_t5_mask;
int chroma_t5_mask_pad;
} sd_ctx_params_t;
typedef struct {
uint32_t width;
@ -127,8 +146,6 @@ typedef struct {
uint8_t* data;
} sd_image_t;
typedef struct sd_ctx_t sd_ctx_t;
typedef struct {
int* layers;
size_t layer_count;
@ -145,106 +162,76 @@ typedef struct {
sd_slg_params_t slg;
} sd_guidance_params_t;
SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
const char* clip_l_path,
const char* clip_g_path,
const char* t5xxl_path,
const char* diffusion_model_path,
const char* vae_path,
const char* taesd_path,
const char* control_net_path_c_str,
const char* lora_model_dir,
const char* embed_dir_c_str,
const char* stacked_id_embed_dir_c_str,
bool vae_decode_only,
bool vae_tiling,
bool free_params_immediately,
int n_threads,
enum sd_type_t wtype,
enum rng_type_t rng_type,
enum schedule_t s,
bool keep_clip_on_cpu,
bool keep_control_net_cpu,
bool keep_vae_on_cpu,
bool diffusion_flash_attn,
bool chroma_use_dit_mask,
bool chroma_use_t5_mask,
int chroma_t5_mask_pad);
typedef struct {
const char* prompt;
const char* negative_prompt;
int clip_skip;
sd_guidance_params_t guidance;
sd_image_t init_image;
sd_image_t* ref_images;
int ref_images_count;
sd_image_t mask_image;
int width;
int height;
enum sample_method_t sample_method;
int sample_steps;
float eta;
float strength;
int64_t seed;
int batch_count;
const sd_image_t* control_cond;
float control_strength;
float style_strength;
bool normalize_input;
const char* input_id_images_path;
} sd_img_gen_params_t;
typedef struct {
sd_image_t init_image;
int width;
int height;
sd_guidance_params_t guidance;
enum sample_method_t sample_method;
int sample_steps;
float strength;
int64_t seed;
int video_frames;
int motion_bucket_id;
int fps;
float augmentation_level;
} sd_vid_gen_params_t;
typedef struct sd_ctx_t sd_ctx_t;
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
SD_API int32_t get_num_physical_cores();
SD_API const char* sd_get_system_info();
SD_API const char* sd_type_name(enum sd_type_t type);
SD_API enum sd_type_t str_to_sd_type(const char* str);
SD_API const char* sd_rng_type_name(enum rng_type_t rng_type);
SD_API enum rng_type_t str_to_rng_type(const char* str);
SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
SD_API enum sample_method_t str_to_sample_method(const char* str);
SD_API const char* sd_schedule_name(enum schedule_t schedule);
SD_API enum schedule_t str_to_schedule(const char* str);
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
SD_API sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params);
SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
const char* prompt,
const char* negative_prompt,
int clip_skip,
sd_guidance_params_t guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
const char* input_id_images_path);
SD_API void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params);
SD_API char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params);
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params);
SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx,
sd_image_t init_image,
sd_image_t mask_image,
const char* prompt,
const char* negative_prompt,
int clip_skip,
sd_guidance_params_t guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
float strength,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
const char* input_id_images_path);
SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
sd_image_t init_image,
int width,
int height,
int video_frames,
int motion_bucket_id,
int fps,
float augmentation_level,
sd_guidance_params_t guidance,
enum sample_method_t sample_method,
int sample_steps,
float strength,
int64_t seed);
SD_API sd_image_t* edit(sd_ctx_t* sd_ctx,
sd_image_t* ref_images,
int ref_images_count,
const char* prompt,
const char* negative_prompt,
int clip_skip,
sd_guidance_params_t guidance,
float eta,
int width,
int height,
enum sample_method_t sample_method,
int sample_steps,
int64_t seed,
int batch_count,
const sd_image_t* control_cond,
float control_strength,
float style_strength,
bool normalize_input,
const char* input_id_images_path);
SD_API void sd_vid_gen_params_init(sd_vid_gen_params_t* sd_vid_gen_params);
SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* sd_vid_gen_params); // broken
typedef struct upscaler_ctx_t upscaler_ctx_t;
@ -254,7 +241,11 @@ SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor);
SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type, const char* tensor_type_rules);
SD_API bool convert(const char* input_path,
const char* vae_path,
const char* output_path,
enum sd_type_t output_type,
const char* tensor_type_rules);
SD_API uint8_t* preprocess_canny(uint8_t* img,
int width,

View File

@ -441,10 +441,6 @@ const char* sd_get_system_info() {
return buffer;
}
const char* sd_type_name(enum sd_type_t type) {
return ggml_type_name((ggml_type)type);
}
sd_image_f32_t sd_image_t_to_sd_image_f32_t(sd_image_t image) {
sd_image_f32_t converted_image;
converted_image.width = image.width;

3
util.h
View File

@ -7,6 +7,9 @@
#include "stable-diffusion.h"
#define SAFE_STR(s) ((s) ? (s) : "")
#define BOOL_STR(b) ((b) ? "true" : "false")
bool ends_with(const std::string& str, const std::string& ending);
bool starts_with(const std::string& str, const std::string& start);
bool contains(const std::string& str, const std::string& substr);