Compare commits

...

4 Commits

10 changed files with 327 additions and 112 deletions

View File

@ -87,6 +87,38 @@ file(GLOB SD_LIB_SOURCES
"*.hpp" "*.hpp"
) )
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
if(GIT_EXE)
execute_process(COMMAND ${GIT_EXE} describe --tags --abbrev=7 --dirty=+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE SDCPP_BUILD_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE SDCPP_BUILD_COMMIT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
endif()
if(NOT SDCPP_BUILD_VERSION)
set(SDCPP_BUILD_VERSION unknown)
endif()
message(STATUS "stable-diffusion.cpp version ${SDCPP_BUILD_VERSION}")
if(NOT SDCPP_BUILD_COMMIT)
set(SDCPP_BUILD_COMMIT unknown)
endif()
message(STATUS "stable-diffusion.cpp commit ${SDCPP_BUILD_COMMIT}")
set_property(
SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp
APPEND PROPERTY COMPILE_DEFINITIONS
SDCPP_BUILD_COMMIT=${SDCPP_BUILD_COMMIT} SDCPP_BUILD_VERSION=${SDCPP_BUILD_VERSION}
)
if(SD_BUILD_SHARED_LIBS) if(SD_BUILD_SHARED_LIBS)
message("-- Build shared library") message("-- Build shared library")
message(${SD_LIB_SOURCES}) message(${SD_LIB_SOURCES})

View File

@ -105,7 +105,7 @@ API and command-line option may change frequently.***
### Download model weights ### Download model weights
- download weights(.ckpt or .safetensors or .gguf). For example - download weights(.ckpt or .safetensors or .gguf). For example
- Stable Diffusion v1.5 from https://huggingface.co/runwayml/stable-diffusion-v1-5 - Stable Diffusion v1.5 from https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5
```sh ```sh
curl -L -O https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors curl -L -O https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors

View File

@ -7,31 +7,6 @@
/*================================================== CLIPTokenizer ===================================================*/ /*================================================== CLIPTokenizer ===================================================*/
__STATIC_INLINE__ std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remove_lora(std::string text) {
std::regex re("<lora:([^:]+):([^>]+)>");
std::smatch matches;
std::unordered_map<std::string, float> filename2multiplier;
while (std::regex_search(text, matches, re)) {
std::string filename = matches[1].str();
float multiplier = std::stof(matches[2].str());
text = std::regex_replace(text, re, "", std::regex_constants::format_first_only);
if (multiplier == 0.f) {
continue;
}
if (filename2multiplier.find(filename) == filename2multiplier.end()) {
filename2multiplier[filename] = multiplier;
} else {
filename2multiplier[filename] += multiplier;
}
}
return std::make_pair(filename2multiplier, text);
}
__STATIC_INLINE__ std::vector<std::pair<int, std::u32string>> bytes_to_unicode() { __STATIC_INLINE__ std::vector<std::pair<int, std::u32string>> bytes_to_unicode() {
std::vector<std::pair<int, std::u32string>> byte_unicode_pairs; std::vector<std::pair<int, std::u32string>> byte_unicode_pairs;
std::set<int> byte_set; std::set<int> byte_set;

View File

@ -324,6 +324,7 @@ struct SDCliParams {
std::string output_path = "output.png"; std::string output_path = "output.png";
bool verbose = false; bool verbose = false;
bool version = false;
bool canny_preprocess = false; bool canny_preprocess = false;
preview_t preview_method = PREVIEW_NONE; preview_t preview_method = PREVIEW_NONE;
@ -366,6 +367,10 @@ struct SDCliParams {
"--verbose", "--verbose",
"print extra info", "print extra info",
true, &verbose}, true, &verbose},
{"",
"--version",
"print stable-diffusion.cpp version",
true, &version},
{"", {"",
"--color", "--color",
"colors the logging tags according to level", "colors the logging tags according to level",
@ -502,7 +507,7 @@ struct SDContextParams {
std::string lora_model_dir; std::string lora_model_dir;
std::map<std::string, std::string> embedding_map; std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_array; std::vector<sd_embedding_t> embedding_vec;
rng_type_t rng_type = CUDA_RNG; rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT; rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
@ -947,13 +952,13 @@ struct SDContextParams {
} }
sd_ctx_params_t to_sd_ctx_params_t(bool vae_decode_only, bool free_params_immediately, bool taesd_preview) { sd_ctx_params_t to_sd_ctx_params_t(bool vae_decode_only, bool free_params_immediately, bool taesd_preview) {
embedding_array.clear(); embedding_vec.clear();
embedding_array.reserve(embedding_map.size()); embedding_vec.reserve(embedding_map.size());
for (const auto& kv : embedding_map) { for (const auto& kv : embedding_map) {
sd_embedding_t item; sd_embedding_t item;
item.name = kv.first.c_str(); item.name = kv.first.c_str();
item.path = kv.second.c_str(); item.path = kv.second.c_str();
embedding_array.emplace_back(item); embedding_vec.emplace_back(item);
} }
sd_ctx_params_t sd_ctx_params = { sd_ctx_params_t sd_ctx_params = {
@ -970,8 +975,8 @@ struct SDContextParams {
taesd_path.c_str(), taesd_path.c_str(),
control_net_path.c_str(), control_net_path.c_str(),
lora_model_dir.c_str(), lora_model_dir.c_str(),
embedding_array.data(), embedding_vec.data(),
static_cast<uint32_t>(embedding_array.size()), static_cast<uint32_t>(embedding_vec.size()),
photo_maker_path.c_str(), photo_maker_path.c_str(),
tensor_type_rules.c_str(), tensor_type_rules.c_str(),
vae_decode_only, vae_decode_only,
@ -1025,6 +1030,15 @@ static std::string vec_str_to_string(const std::vector<std::string>& v) {
return oss.str(); return oss.str();
} }
static bool is_absolute_path(const std::string& p) {
#ifdef _WIN32
// Windows: C:/path or C:\path
return p.size() > 1 && std::isalpha(static_cast<unsigned char>(p[0])) && p[1] == ':';
#else
return !p.empty() && p[0] == '/';
#endif
}
struct SDGenerationParams { struct SDGenerationParams {
std::string prompt; std::string prompt;
std::string negative_prompt; std::string negative_prompt;
@ -1067,6 +1081,10 @@ struct SDGenerationParams {
int upscale_repeats = 1; int upscale_repeats = 1;
std::map<std::string, float> lora_map;
std::map<std::string, float> high_noise_lora_map;
std::vector<sd_lora_t> lora_vec;
SDGenerationParams() { SDGenerationParams() {
sd_sample_params_init(&sample_params); sd_sample_params_init(&sample_params);
sd_sample_params_init(&high_noise_sample_params); sd_sample_params_init(&high_noise_sample_params);
@ -1437,7 +1455,88 @@ struct SDGenerationParams {
return options; return options;
} }
bool process_and_check(SDMode mode) { void extract_and_remove_lora(const std::string& lora_model_dir) {
static const std::regex re(R"(<lora:([^:>]+):([^>]+)>)");
static const std::vector<std::string> valid_ext = {".pt", ".safetensors", ".gguf"};
std::smatch m;
std::string tmp = prompt;
while (std::regex_search(tmp, m, re)) {
std::string raw_path = m[1].str();
const std::string raw_mul = m[2].str();
float mul = 0.f;
try {
mul = std::stof(raw_mul);
} catch (...) {
tmp = m.suffix().str();
prompt = std::regex_replace(prompt, re, "", std::regex_constants::format_first_only);
continue;
}
bool is_high_noise = false;
static const std::string prefix = "|high_noise|";
if (raw_path.rfind(prefix, 0) == 0) {
raw_path.erase(0, prefix.size());
is_high_noise = true;
}
fs::path final_path;
if (is_absolute_path(raw_path)) {
final_path = raw_path;
} else {
final_path = fs::path(lora_model_dir) / raw_path;
}
if (!fs::exists(final_path)) {
bool found = false;
for (const auto& ext : valid_ext) {
fs::path try_path = final_path;
try_path += ext;
if (fs::exists(try_path)) {
final_path = try_path;
found = true;
break;
}
}
if (!found) {
printf("can not found lora %s\n", final_path.lexically_normal().string().c_str());
tmp = m.suffix().str();
prompt = std::regex_replace(prompt, re, "", std::regex_constants::format_first_only);
continue;
}
}
const std::string key = final_path.lexically_normal().string();
if (is_high_noise)
high_noise_lora_map[key] += mul;
else
lora_map[key] += mul;
prompt = std::regex_replace(prompt, re, "", std::regex_constants::format_first_only);
tmp = m.suffix().str();
}
for (const auto& kv : lora_map) {
sd_lora_t item;
item.is_high_noise = false;
item.path = kv.first.c_str();
item.multiplier = kv.second;
lora_vec.emplace_back(item);
}
for (const auto& kv : high_noise_lora_map) {
sd_lora_t item;
item.is_high_noise = true;
item.path = kv.first.c_str();
item.multiplier = kv.second;
lora_vec.emplace_back(item);
}
}
bool process_and_check(SDMode mode, const std::string& lora_model_dir) {
if (width <= 0) { if (width <= 0) {
fprintf(stderr, "error: the width must be greater than 0\n"); fprintf(stderr, "error: the width must be greater than 0\n");
return false; return false;
@ -1548,14 +1647,44 @@ struct SDGenerationParams {
seed = rand(); seed = rand();
} }
extract_and_remove_lora(lora_model_dir);
return true; return true;
} }
std::string to_string() const { std::string to_string() const {
char* sample_params_str = sd_sample_params_to_str(&sample_params); char* sample_params_str = sd_sample_params_to_str(&sample_params);
char* high_noise_sample_params_str = sd_sample_params_to_str(&high_noise_sample_params); char* high_noise_sample_params_str = sd_sample_params_to_str(&high_noise_sample_params);
std::ostringstream lora_ss;
lora_ss << "{\n";
for (auto it = lora_map.begin(); it != lora_map.end(); ++it) {
lora_ss << " \"" << it->first << "\": \"" << it->second << "\"";
if (std::next(it) != lora_map.end()) {
lora_ss << ",";
}
lora_ss << "\n";
}
lora_ss << " }";
std::string loras_str = lora_ss.str();
lora_ss = std::ostringstream();
;
lora_ss << "{\n";
for (auto it = high_noise_lora_map.begin(); it != high_noise_lora_map.end(); ++it) {
lora_ss << " \"" << it->first << "\": \"" << it->second << "\"";
if (std::next(it) != high_noise_lora_map.end()) {
lora_ss << ",";
}
lora_ss << "\n";
}
lora_ss << " }";
std::string high_noise_loras_str = lora_ss.str();
std::ostringstream oss; std::ostringstream oss;
oss << "SDGenerationParams {\n" oss << "SDGenerationParams {\n"
<< " loras: \"" << loras_str << "\",\n"
<< " high_noise_loras: \"" << high_noise_loras_str << "\",\n"
<< " prompt: \"" << prompt << "\",\n" << " prompt: \"" << prompt << "\",\n"
<< " negative_prompt: \"" << negative_prompt << "\",\n" << " negative_prompt: \"" << negative_prompt << "\",\n"
<< " clip_skip: " << clip_skip << ",\n" << " clip_skip: " << clip_skip << ",\n"
@ -1598,7 +1727,12 @@ struct SDGenerationParams {
} }
}; };
static std::string version_string() {
return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
}
void print_usage(int argc, const char* argv[], const std::vector<ArgOptions>& options_list) { void print_usage(int argc, const char* argv[], const std::vector<ArgOptions>& options_list) {
std::cout << version_string() << "\n";
std::cout << "Usage: " << argv[0] << " [options]\n\n"; std::cout << "Usage: " << argv[0] << " [options]\n\n";
std::cout << "CLI Options:\n"; std::cout << "CLI Options:\n";
options_list[0].print(); options_list[0].print();
@ -1616,7 +1750,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
exit(cli_params.normal_exit ? 0 : 1); exit(cli_params.normal_exit ? 0 : 1);
} }
if (!cli_params.process_and_check() || !ctx_params.process_and_check(cli_params.mode) || !gen_params.process_and_check(cli_params.mode)) { if (!cli_params.process_and_check() ||
!ctx_params.process_and_check(cli_params.mode) ||
!gen_params.process_and_check(cli_params.mode, ctx_params.lora_model_dir)) {
print_usage(argc, argv, options_vec); print_usage(argc, argv, options_vec);
exit(1); exit(1);
} }
@ -1881,11 +2017,19 @@ void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy,
} }
int main(int argc, const char* argv[]) { int main(int argc, const char* argv[]) {
if (argc > 1 && std::string(argv[1]) == "--version") {
std::cout << version_string() << "\n";
return EXIT_SUCCESS;
}
SDCliParams cli_params; SDCliParams cli_params;
SDContextParams ctx_params; SDContextParams ctx_params;
SDGenerationParams gen_params; SDGenerationParams gen_params;
parse_args(argc, argv, cli_params, ctx_params, gen_params); parse_args(argc, argv, cli_params, ctx_params, gen_params);
if (cli_params.verbose || cli_params.version) {
std::cout << version_string() << "\n";
}
if (gen_params.video_frames > 4) { if (gen_params.video_frames > 4) {
size_t last_dot_pos = cli_params.preview_path.find_last_of("."); size_t last_dot_pos = cli_params.preview_path.find_last_of(".");
std::string base_path = cli_params.preview_path; std::string base_path = cli_params.preview_path;
@ -2121,6 +2265,8 @@ int main(int argc, const char* argv[]) {
if (cli_params.mode == IMG_GEN) { if (cli_params.mode == IMG_GEN) {
sd_img_gen_params_t img_gen_params = { sd_img_gen_params_t img_gen_params = {
gen_params.lora_vec.data(),
static_cast<uint32_t>(gen_params.lora_vec.size()),
gen_params.prompt.c_str(), gen_params.prompt.c_str(),
gen_params.negative_prompt.c_str(), gen_params.negative_prompt.c_str(),
gen_params.clip_skip, gen_params.clip_skip,
@ -2152,6 +2298,8 @@ int main(int argc, const char* argv[]) {
num_results = gen_params.batch_count; num_results = gen_params.batch_count;
} else if (cli_params.mode == VID_GEN) { } else if (cli_params.mode == VID_GEN) {
sd_vid_gen_params_t vid_gen_params = { sd_vid_gen_params_t vid_gen_params = {
gen_params.lora_vec.data(),
static_cast<uint32_t>(gen_params.lora_vec.size()),
gen_params.prompt.c_str(), gen_params.prompt.c_str(),
gen_params.negative_prompt.c_str(), gen_params.negative_prompt.c_str(),
gen_params.clip_skip, gen_params.clip_skip,

View File

@ -91,6 +91,41 @@ const float flux_latent_rgb_proj[16][3] = {
{-0.111849f, -0.055589f, -0.032361f}}; {-0.111849f, -0.055589f, -0.032361f}};
float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f}; float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
const float flux2_latent_rgb_proj[32][3] = {
{0.000736f, -0.008385f, -0.019710f},
{-0.001352f, -0.016392f, 0.020693f},
{-0.006376f, 0.002428f, 0.036736f},
{0.039384f, 0.074167f, 0.119789f},
{0.007464f, -0.005705f, -0.004734f},
{-0.004086f, 0.005287f, -0.000409f},
{-0.032835f, 0.050802f, -0.028120f},
{-0.003158f, -0.000835f, 0.000406f},
{-0.112840f, -0.084337f, -0.023083f},
{0.001462f, -0.006656f, 0.000549f},
{-0.009980f, -0.007480f, 0.009702f},
{0.032540f, 0.000214f, -0.061388f},
{0.011023f, 0.000694f, 0.007143f},
{-0.001468f, -0.006723f, -0.001678f},
{-0.005921f, -0.010320f, -0.003907f},
{-0.028434f, 0.027584f, 0.018457f},
{0.014349f, 0.011523f, 0.000441f},
{0.009874f, 0.003081f, 0.001507f},
{0.002218f, 0.005712f, 0.001563f},
{0.053010f, -0.019844f, 0.008683f},
{-0.002507f, 0.005384f, 0.000938f},
{-0.002177f, -0.011366f, 0.003559f},
{-0.000261f, 0.015121f, -0.003240f},
{-0.003944f, -0.002083f, 0.005043f},
{-0.009138f, 0.011336f, 0.003781f},
{0.011429f, 0.003985f, -0.003855f},
{0.010518f, -0.005586f, 0.010131f},
{0.007883f, 0.002912f, -0.001473f},
{-0.003318f, -0.003160f, 0.003684f},
{-0.034560f, -0.008740f, 0.012996f},
{0.000166f, 0.001079f, -0.012153f},
{0.017772f, 0.000937f, -0.011953f}};
float flux2_latent_rgb_bias[3] = {-0.028738f, -0.098463f, -0.107619f};
// This one was taken straight from // This one was taken straight from
// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303 // https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
// (MiT Licence) // (MiT Licence)
@ -128,16 +163,42 @@ const float sd_latent_rgb_proj[4][3] = {
{-0.178022f, -0.200862f, -0.678514f}}; {-0.178022f, -0.200862f, -0.678514f}};
float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f}; float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int width, int height, int frames, int dim) { void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
size_t buffer_head = 0; size_t buffer_head = 0;
uint32_t latent_width = latents->ne[0];
uint32_t latent_height = latents->ne[1];
uint32_t dim = latents->ne[ggml_n_dims(latents) - 1];
uint32_t frames = 1;
if (ggml_n_dims(latents) == 4) {
frames = latents->ne[2];
}
uint32_t rgb_width = latent_width * patch_size;
uint32_t rgb_height = latent_height * patch_size;
uint32_t unpatched_dim = dim / (patch_size * patch_size);
for (int k = 0; k < frames; k++) { for (int k = 0; k < frames; k++) {
for (int j = 0; j < height; j++) { for (int rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
for (int i = 0; i < width; i++) { for (int rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
size_t latent_id = (i * latents->nb[0] + j * latents->nb[1] + k * latents->nb[2]); int latent_x = rgb_x / patch_size;
int latent_y = rgb_y / patch_size;
int channel_offset = 0;
if (patch_size > 1) {
channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
}
size_t latent_id = (latent_x * latents->nb[0] + latent_y * latents->nb[1] + k * latents->nb[2]);
// should be incremented by 1 for each pixel
size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
float r = 0, g = 0, b = 0; float r = 0, g = 0, b = 0;
if (latent_rgb_proj != nullptr) { if (latent_rgb_proj != nullptr) {
for (int d = 0; d < dim; d++) { for (int d = 0; d < unpatched_dim; d++) {
float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[ggml_n_dims(latents) - 1]); float value = *(float*)((char*)latents->data + latent_id + (d * patch_size * patch_size + channel_offset) * latents->nb[ggml_n_dims(latents) - 1]);
r += value * latent_rgb_proj[d][0]; r += value * latent_rgb_proj[d][0];
g += value * latent_rgb_proj[d][1]; g += value * latent_rgb_proj[d][1];
b += value * latent_rgb_proj[d][2]; b += value * latent_rgb_proj[d][2];
@ -164,9 +225,9 @@ void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const fl
g = g >= 0 ? g <= 1 ? g : 1 : 0; g = g >= 0 ? g <= 1 ? g : 1 : 0;
b = b >= 0 ? b <= 1 ? b : 1 : 0; b = b >= 0 ? b <= 1 ? b : 1 : 0;
buffer[buffer_head++] = (uint8_t)(r * 255); buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
buffer[buffer_head++] = (uint8_t)(g * 255); buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
buffer[buffer_head++] = (uint8_t)(b * 255); buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
} }
} }
} }

View File

@ -937,28 +937,17 @@ public:
float multiplier, float multiplier,
ggml_backend_t backend, ggml_backend_t backend,
LoraModel::filter_t lora_tensor_filter = nullptr) { LoraModel::filter_t lora_tensor_filter = nullptr) {
std::string lora_name = lora_id; std::string lora_path = lora_id;
std::string high_noise_tag = "|high_noise|"; static std::string high_noise_tag = "|high_noise|";
bool is_high_noise = false; bool is_high_noise = false;
if (starts_with(lora_name, high_noise_tag)) { if (starts_with(lora_path, high_noise_tag)) {
lora_name = lora_name.substr(high_noise_tag.size()); lora_path = lora_path.substr(high_noise_tag.size());
is_high_noise = true; is_high_noise = true;
LOG_DEBUG("high noise lora: %s", lora_name.c_str()); LOG_DEBUG("high noise lora: %s", lora_path.c_str());
} }
std::string st_file_path = path_join(lora_model_dir, lora_name + ".safetensors"); auto lora = std::make_shared<LoraModel>(lora_id, backend, lora_path, is_high_noise ? "model.high_noise_" : "", version);
std::string ckpt_file_path = path_join(lora_model_dir, lora_name + ".ckpt");
std::string file_path;
if (file_exists(st_file_path)) {
file_path = st_file_path;
} else if (file_exists(ckpt_file_path)) {
file_path = ckpt_file_path;
} else {
LOG_WARN("can not find %s or %s for lora %s", st_file_path.c_str(), ckpt_file_path.c_str(), lora_name.c_str());
return nullptr;
}
auto lora = std::make_shared<LoraModel>(lora_id, backend, file_path, is_high_noise ? "model.high_noise_" : "", version);
if (!lora->load_from_file(n_threads, lora_tensor_filter)) { if (!lora->load_from_file(n_threads, lora_tensor_filter)) {
LOG_WARN("load lora tensors from %s failed", file_path.c_str()); LOG_WARN("load lora tensors from %s failed", lora_path.c_str());
return nullptr; return nullptr;
} }
@ -1143,12 +1132,15 @@ public:
} }
} }
std::string apply_loras_from_prompt(const std::string& prompt) { void apply_loras(const sd_lora_t* loras, uint32_t lora_count) {
auto result_pair = extract_and_remove_lora(prompt); std::unordered_map<std::string, float> lora_f2m;
std::unordered_map<std::string, float> lora_f2m = result_pair.first; // lora_name -> multiplier for (int i = 0; i < lora_count; i++) {
std::string lora_id = SAFE_STR(loras[i].path);
for (auto& kv : lora_f2m) { if (loras[i].is_high_noise) {
LOG_DEBUG("lora %s:%.2f", kv.first.c_str(), kv.second); lora_id = "|high_noise|" + lora_id;
}
lora_f2m[lora_id] = loras[i].multiplier;
LOG_DEBUG("lora %s:%.2f", lora_id.c_str(), loras[i].multiplier);
} }
int64_t t0 = ggml_time_ms(); int64_t t0 = ggml_time_ms();
if (apply_lora_immediately) { if (apply_lora_immediately) {
@ -1159,9 +1151,7 @@ public:
int64_t t1 = ggml_time_ms(); int64_t t1 = ggml_time_ms();
if (!lora_f2m.empty()) { if (!lora_f2m.empty()) {
LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000); LOG_INFO("apply_loras completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
LOG_DEBUG("prompt after extract and remove lora: \"%s\"", result_pair.second.c_str());
} }
return result_pair.second;
} }
ggml_tensor* id_encoder(ggml_context* work_ctx, ggml_tensor* id_encoder(ggml_context* work_ctx,
@ -1326,10 +1316,17 @@ public:
uint32_t dim = latents->ne[ggml_n_dims(latents) - 1]; uint32_t dim = latents->ne[ggml_n_dims(latents) - 1];
if (preview_mode == PREVIEW_PROJ) { if (preview_mode == PREVIEW_PROJ) {
int64_t patch_sz = 1;
const float(*latent_rgb_proj)[channel] = nullptr; const float(*latent_rgb_proj)[channel] = nullptr;
float* latent_rgb_bias = nullptr; float* latent_rgb_bias = nullptr;
if (dim == 48) { if (dim == 128) {
if (sd_version_is_flux2(version)) {
latent_rgb_proj = flux2_latent_rgb_proj;
latent_rgb_bias = flux2_latent_rgb_bias;
patch_sz = 2;
}
} else if (dim == 48) {
if (sd_version_is_wan(version)) { if (sd_version_is_wan(version)) {
latent_rgb_proj = wan_22_latent_rgb_proj; latent_rgb_proj = wan_22_latent_rgb_proj;
latent_rgb_bias = wan_22_latent_rgb_bias; latent_rgb_bias = wan_22_latent_rgb_bias;
@ -1382,12 +1379,15 @@ public:
frames = latents->ne[2]; frames = latents->ne[2];
} }
uint8_t* data = (uint8_t*)malloc(frames * width * height * channel * sizeof(uint8_t)); uint32_t img_width = width * patch_sz;
uint32_t img_height = height * patch_sz;
preview_latent_video(data, latents, latent_rgb_proj, latent_rgb_bias, width, height, frames, dim); uint8_t* data = (uint8_t*)malloc(frames * img_width * img_height * channel * sizeof(uint8_t));
preview_latent_video(data, latents, latent_rgb_proj, latent_rgb_bias, patch_sz);
sd_image_t* images = (sd_image_t*)malloc(frames * sizeof(sd_image_t)); sd_image_t* images = (sd_image_t*)malloc(frames * sizeof(sd_image_t));
for (int i = 0; i < frames; i++) { for (int i = 0; i < frames; i++) {
images[i] = {width, height, channel, data + i * width * height * channel}; images[i] = {img_width, img_height, channel, data + i * img_width * img_height * channel};
} }
step_callback(step, frames, images, is_noisy, step_callback_data); step_callback(step, frames, images, is_noisy, step_callback_data);
free(data); free(data);
@ -2805,8 +2805,6 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
int sample_steps = sigmas.size() - 1; int sample_steps = sigmas.size() - 1;
int64_t t0 = ggml_time_ms(); int64_t t0 = ggml_time_ms();
// Apply lora
prompt = sd_ctx->sd->apply_loras_from_prompt(prompt);
// Photo Maker // Photo Maker
std::string prompt_text_only; std::string prompt_text_only;
@ -3178,6 +3176,9 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
size_t t0 = ggml_time_ms(); size_t t0 = ggml_time_ms();
// Apply lora
sd_ctx->sd->apply_loras(sd_img_gen_params->loras, sd_img_gen_params->lora_count);
enum sample_method_t sample_method = sd_img_gen_params->sample_params.sample_method; enum sample_method_t sample_method = sd_img_gen_params->sample_params.sample_method;
if (sample_method == SAMPLE_METHOD_COUNT) { if (sample_method == SAMPLE_METHOD_COUNT) {
sample_method = sd_get_default_sample_method(sd_ctx); sample_method = sd_get_default_sample_method(sd_ctx);
@ -3477,7 +3478,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
int64_t t0 = ggml_time_ms(); int64_t t0 = ggml_time_ms();
// Apply lora // Apply lora
prompt = sd_ctx->sd->apply_loras_from_prompt(prompt); sd_ctx->sd->apply_loras(sd_vid_gen_params->loras, sd_vid_gen_params->lora_count);
ggml_tensor* init_latent = nullptr; ggml_tensor* init_latent = nullptr;
ggml_tensor* clip_vision_output = nullptr; ggml_tensor* clip_vision_output = nullptr;

View File

@ -242,6 +242,14 @@ typedef struct {
} sd_easycache_params_t; } sd_easycache_params_t;
typedef struct { typedef struct {
bool is_high_noise;
float multiplier;
const char* path;
} sd_lora_t;
typedef struct {
const sd_lora_t* loras;
uint32_t lora_count;
const char* prompt; const char* prompt;
const char* negative_prompt; const char* negative_prompt;
int clip_skip; int clip_skip;
@ -265,6 +273,8 @@ typedef struct {
} sd_img_gen_params_t; } sd_img_gen_params_t;
typedef struct { typedef struct {
const sd_lora_t* loras;
uint32_t lora_count;
const char* prompt; const char* prompt;
const char* negative_prompt; const char* negative_prompt;
int clip_skip; int clip_skip;
@ -359,6 +369,9 @@ SD_API bool preprocess_canny(sd_image_t image,
float strong, float strong,
bool inverse); bool inverse);
SD_API const char* sd_commit(void);
SD_API const char* sd_version(void);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -95,20 +95,6 @@ bool is_directory(const std::string& path) {
return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY)); return (attributes != INVALID_FILE_ATTRIBUTES && (attributes & FILE_ATTRIBUTE_DIRECTORY));
} }
std::string get_full_path(const std::string& dir, const std::string& filename) {
std::string full_path = dir + "\\" + filename;
WIN32_FIND_DATA find_file_data;
HANDLE hFind = FindFirstFile(full_path.c_str(), &find_file_data);
if (hFind != INVALID_HANDLE_VALUE) {
FindClose(hFind);
return full_path;
} else {
return "";
}
}
#else // Unix #else // Unix
#include <dirent.h> #include <dirent.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -123,26 +109,6 @@ bool is_directory(const std::string& path) {
return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode)); return (stat(path.c_str(), &buffer) == 0 && S_ISDIR(buffer.st_mode));
} }
// TODO: add windows version
std::string get_full_path(const std::string& dir, const std::string& filename) {
DIR* dp = opendir(dir.c_str());
if (dp != nullptr) {
struct dirent* entry;
while ((entry = readdir(dp)) != nullptr) {
if (strcasecmp(entry->d_name, filename.c_str()) == 0) {
closedir(dp);
return dir + "/" + entry->d_name;
}
}
closedir(dp);
}
return "";
}
#endif #endif
// get_num_physical_cores is copy from // get_num_physical_cores is copy from

1
util.h
View File

@ -22,7 +22,6 @@ int round_up_to(int value, int base);
bool file_exists(const std::string& filename); bool file_exists(const std::string& filename);
bool is_directory(const std::string& path); bool is_directory(const std::string& path);
std::string get_full_path(const std::string& dir, const std::string& filename);
std::u32string utf8_to_utf32(const std::string& utf8_str); std::u32string utf8_to_utf32(const std::string& utf8_str);
std::string utf32_to_utf8(const std::u32string& utf32_str); std::string utf32_to_utf8(const std::u32string& utf32_str);

20
version.cpp Normal file
View File

@ -0,0 +1,20 @@
#include "stable-diffusion.h"
#ifndef SDCPP_BUILD_COMMIT
#define SDCPP_BUILD_COMMIT unknown
#endif
#ifndef SDCPP_BUILD_VERSION
#define SDCPP_BUILD_VERSION unknown
#endif
#define STRINGIZE2(x) #x
#define STRINGIZE(x) STRINGIZE2(x)
const char* sd_commit(void) {
return STRINGIZE(SDCPP_BUILD_COMMIT);
}
const char* sd_version(void) {
return STRINGIZE(SDCPP_BUILD_VERSION);
}