style: format code

This commit is contained in:
leejet 2025-06-29 23:40:55 +08:00
parent b1cc40c35c
commit 45d0ebb30c
11 changed files with 92 additions and 99 deletions

View File

@ -1224,14 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
T5UniGramTokenizer t5_tokenizer; T5UniGramTokenizer t5_tokenizer;
std::shared_ptr<T5Runner> t5; std::shared_ptr<T5Runner> t5;
size_t chunk_len = 512; size_t chunk_len = 512;
bool use_mask = false; bool use_mask = false;
int mask_pad = 1; int mask_pad = 1;
PixArtCLIPEmbedder(ggml_backend_t backend, PixArtCLIPEmbedder(ggml_backend_t backend,
std::map<std::string, enum ggml_type>& tensor_types, std::map<std::string, enum ggml_type>& tensor_types,
int clip_skip = -1, int clip_skip = -1,
bool use_mask = false, bool use_mask = false,
int mask_pad = 1) : use_mask(use_mask), mask_pad(mask_pad) { int mask_pad = 1)
: use_mask(use_mask), mask_pad(mask_pad) {
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer"); t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
} }

View File

@ -1019,7 +1019,7 @@ static void sample_k_diffusion(sample_method_t method,
// also needed to invert the behavior of CompVisDenoiser // also needed to invert the behavior of CompVisDenoiser
// (k-diffusion's LMSDiscreteScheduler) // (k-diffusion's LMSDiscreteScheduler)
float beta_start = 0.00085f; float beta_start = 0.00085f;
float beta_end = 0.0120f; float beta_end = 0.0120f;
std::vector<double> alphas_cumprod; std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas; std::vector<double> compvis_sigmas;
@ -1030,8 +1030,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) * (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f - (1.0f -
std::pow(sqrtf(beta_start) + std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) * (sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2)); ((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] = compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) / std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]); alphas_cumprod[i]);
@ -1061,7 +1062,8 @@ static void sample_k_diffusion(sample_method_t method,
// - pred_prev_sample -> "x_t-1" // - pred_prev_sample -> "x_t-1"
int timestep = int timestep =
roundf(TIMESTEPS - roundf(TIMESTEPS -
i * ((float)TIMESTEPS / steps)) - 1; i * ((float)TIMESTEPS / steps)) -
1;
// 1. get previous step value (=t-1) // 1. get previous step value (=t-1)
int prev_timestep = timestep - TIMESTEPS / steps; int prev_timestep = timestep - TIMESTEPS / steps;
// The sigma here is chosen to cause the // The sigma here is chosen to cause the
@ -1086,10 +1088,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data; float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) { for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) / vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma; sigma;
} }
} } else {
else {
// For the subsequent steps after the first one, // For the subsequent steps after the first one,
// at this point x = latents or x = sample, and // at this point x = latents or x = sample, and
// needs to be prescaled with x <- sample / c_in // needs to be prescaled with x <- sample / c_in
@ -1127,9 +1128,8 @@ static void sample_k_diffusion(sample_method_t method,
float alpha_prod_t = alphas_cumprod[timestep]; float alpha_prod_t = alphas_cumprod[timestep];
// Note final_alpha_cumprod = alphas_cumprod[0] due to // Note final_alpha_cumprod = alphas_cumprod[0] due to
// trailing timestep spacing // trailing timestep spacing
float alpha_prod_t_prev = prev_timestep >= 0 ? float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
alphas_cumprod[prev_timestep] : alphas_cumprod[0]; float beta_prod_t = 1 - alpha_prod_t;
float beta_prod_t = 1 - alpha_prod_t;
// 3. compute predicted original sample from predicted // 3. compute predicted original sample from predicted
// noise also called "predicted x_0" of formula (12) // noise also called "predicted x_0" of formula (12)
// from https://arxiv.org/pdf/2010.02502.pdf // from https://arxiv.org/pdf/2010.02502.pdf
@ -1145,7 +1145,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] = vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) - (vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) * std::sqrt(beta_prod_t) *
vec_model_output[j]) * vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t)); (1 / std::sqrt(alpha_prod_t));
} }
} }
@ -1159,8 +1159,8 @@ static void sample_k_diffusion(sample_method_t method,
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) * // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
// sqrt(1 - alpha_t/alpha_t-1) // sqrt(1 - alpha_t/alpha_t-1)
float beta_prod_t_prev = 1 - alpha_prod_t_prev; float beta_prod_t_prev = 1 - alpha_prod_t_prev;
float variance = (beta_prod_t_prev / beta_prod_t) * float variance = (beta_prod_t_prev / beta_prod_t) *
(1 - alpha_prod_t / alpha_prod_t_prev); (1 - alpha_prod_t / alpha_prod_t_prev);
float std_dev_t = eta * std::sqrt(variance); float std_dev_t = eta * std::sqrt(variance);
// 6. compute "direction pointing to x_t" of formula // 6. compute "direction pointing to x_t" of formula
// (12) from https://arxiv.org/pdf/2010.02502.pdf // (12) from https://arxiv.org/pdf/2010.02502.pdf
@ -1179,8 +1179,8 @@ static void sample_k_diffusion(sample_method_t method,
std::pow(std_dev_t, 2)) * std::pow(std_dev_t, 2)) *
vec_model_output[j]; vec_model_output[j];
vec_x[j] = std::sqrt(alpha_prod_t_prev) * vec_x[j] = std::sqrt(alpha_prod_t_prev) *
vec_pred_original_sample[j] + vec_pred_original_sample[j] +
pred_sample_direction; pred_sample_direction;
} }
} }
if (eta > 0) { if (eta > 0) {
@ -1208,7 +1208,7 @@ static void sample_k_diffusion(sample_method_t method,
// by Semi-Linear Consistency Function with Trajectory // by Semi-Linear Consistency Function with Trajectory
// Mapping", arXiv:2402.19159 [cs.CV] // Mapping", arXiv:2402.19159 [cs.CV]
float beta_start = 0.00085f; float beta_start = 0.00085f;
float beta_end = 0.0120f; float beta_end = 0.0120f;
std::vector<double> alphas_cumprod; std::vector<double> alphas_cumprod;
std::vector<double> compvis_sigmas; std::vector<double> compvis_sigmas;
@ -1219,8 +1219,9 @@ static void sample_k_diffusion(sample_method_t method,
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) * (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
(1.0f - (1.0f -
std::pow(sqrtf(beta_start) + std::pow(sqrtf(beta_start) +
(sqrtf(beta_end) - sqrtf(beta_start)) * (sqrtf(beta_end) - sqrtf(beta_start)) *
((float)i / (TIMESTEPS - 1)), 2)); ((float)i / (TIMESTEPS - 1)),
2));
compvis_sigmas[i] = compvis_sigmas[i] =
std::sqrt((1 - alphas_cumprod[i]) / std::sqrt((1 - alphas_cumprod[i]) /
alphas_cumprod[i]); alphas_cumprod[i]);
@ -1235,13 +1236,10 @@ static void sample_k_diffusion(sample_method_t method,
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
// Analytic form for TCD timesteps // Analytic form for TCD timesteps
int timestep = TIMESTEPS - 1 - int timestep = TIMESTEPS - 1 -
(TIMESTEPS / original_steps) * (TIMESTEPS / original_steps) *
(int)floor(i * ((float)original_steps / steps)); (int)floor(i * ((float)original_steps / steps));
// 1. get previous step value // 1. get previous step value
int prev_timestep = i >= steps - 1 ? 0 : int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
(int)floor((i + 1) *
((float)original_steps / steps));
// Here timestep_s is tau_n' in Algorithm 4. The _s // Here timestep_s is tau_n' in Algorithm 4. The _s
// notation appears to be that from C. Lu, // notation appears to be that from C. Lu,
// "DPM-Solver: A Fast ODE Solver for Diffusion // "DPM-Solver: A Fast ODE Solver for Diffusion
@ -1258,10 +1256,9 @@ static void sample_k_diffusion(sample_method_t method,
float* vec_x = (float*)x->data; float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) { for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1) / vec_x[j] *= std::sqrt(sigma * sigma + 1) /
sigma; sigma;
} }
} } else {
else {
float* vec_x = (float*)x->data; float* vec_x = (float*)x->data;
for (int j = 0; j < ggml_nelements(x); j++) { for (int j = 0; j < ggml_nelements(x); j++) {
vec_x[j] *= std::sqrt(sigma * sigma + 1); vec_x[j] *= std::sqrt(sigma * sigma + 1);
@ -1294,15 +1291,14 @@ static void sample_k_diffusion(sample_method_t method,
// DPM-Solver. In fact, we have alpha_{t_n} = // DPM-Solver. In fact, we have alpha_{t_n} =
// \sqrt{\hat{alpha_n}}, [...]" // \sqrt{\hat{alpha_n}}, [...]"
float alpha_prod_t = alphas_cumprod[timestep]; float alpha_prod_t = alphas_cumprod[timestep];
float beta_prod_t = 1 - alpha_prod_t; float beta_prod_t = 1 - alpha_prod_t;
// Note final_alpha_cumprod = alphas_cumprod[0] since // Note final_alpha_cumprod = alphas_cumprod[0] since
// TCD is always "trailing" // TCD is always "trailing"
float alpha_prod_t_prev = prev_timestep >= 0 ? float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
// The subscript _s are the only portion in this // The subscript _s are the only portion in this
// section (2) unique to TCD // section (2) unique to TCD
float alpha_prod_s = alphas_cumprod[timestep_s]; float alpha_prod_s = alphas_cumprod[timestep_s];
float beta_prod_s = 1 - alpha_prod_s; float beta_prod_s = 1 - alpha_prod_s;
// 3. Compute the predicted noised sample x_s based on // 3. Compute the predicted noised sample x_s based on
// the model parameterization // the model parameterization
// //
@ -1317,7 +1313,7 @@ static void sample_k_diffusion(sample_method_t method,
vec_pred_original_sample[j] = vec_pred_original_sample[j] =
(vec_x[j] / std::sqrt(sigma * sigma + 1) - (vec_x[j] / std::sqrt(sigma * sigma + 1) -
std::sqrt(beta_prod_t) * std::sqrt(beta_prod_t) *
vec_model_output[j]) * vec_model_output[j]) *
(1 / std::sqrt(alpha_prod_t)); (1 / std::sqrt(alpha_prod_t));
} }
} }
@ -1339,9 +1335,9 @@ static void sample_k_diffusion(sample_method_t method,
// pred_epsilon = model_output // pred_epsilon = model_output
vec_x[j] = vec_x[j] =
std::sqrt(alpha_prod_s) * std::sqrt(alpha_prod_s) *
vec_pred_original_sample[j] + vec_pred_original_sample[j] +
std::sqrt(beta_prod_s) * std::sqrt(beta_prod_s) *
vec_model_output[j]; vec_model_output[j];
} }
} }
// 4. Sample and inject noise z ~ N(0, I) for // 4. Sample and inject noise z ~ N(0, I) for
@ -1357,7 +1353,7 @@ static void sample_k_diffusion(sample_method_t method,
// In this case, x is still pred_noised_sample, // In this case, x is still pred_noised_sample,
// continue in-place // continue in-place
ggml_tensor_set_f32_randn(noise, rng); ggml_tensor_set_f32_randn(noise, rng);
float* vec_x = (float*)x->data; float* vec_x = (float*)x->data;
float* vec_noise = (float*)noise->data; float* vec_noise = (float*)noise->data;
for (int j = 0; j < ggml_nelements(x); j++) { for (int j = 0; j < ggml_nelements(x); j++) {
// Corresponding to (35) in Zheng et // Corresponding to (35) in Zheng et
@ -1366,10 +1362,10 @@ static void sample_k_diffusion(sample_method_t method,
vec_x[j] = vec_x[j] =
std::sqrt(alpha_prod_t_prev / std::sqrt(alpha_prod_t_prev /
alpha_prod_s) * alpha_prod_s) *
vec_x[j] + vec_x[j] +
std::sqrt(1 - alpha_prod_t_prev / std::sqrt(1 - alpha_prod_t_prev /
alpha_prod_s) * alpha_prod_s) *
vec_noise[j]; vec_noise[j];
} }
} }
} }

View File

@ -13,7 +13,7 @@ struct DiffusionModel {
struct ggml_tensor* c_concat, struct ggml_tensor* c_concat,
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1, int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {}, std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f, float control_strength = 0.f,
@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
struct ggml_tensor* c_concat, struct ggml_tensor* c_concat,
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1, int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {}, std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f, float control_strength = 0.f,
@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
struct ggml_tensor* c_concat, struct ggml_tensor* c_concat,
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1, int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {}, std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f, float control_strength = 0.f,
@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
struct ggml_tensor* c_concat, struct ggml_tensor* c_concat,
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
int num_video_frames = -1, int num_video_frames = -1,
std::vector<struct ggml_tensor*> controls = {}, std::vector<struct ggml_tensor*> controls = {},
float control_strength = 0.f, float control_strength = 0.f,

View File

@ -133,9 +133,9 @@ struct SDParams {
float skip_layer_start = 0.01f; float skip_layer_start = 0.01f;
float skip_layer_end = 0.2f; float skip_layer_end = 0.2f;
bool chroma_use_dit_mask = true; bool chroma_use_dit_mask = true;
bool chroma_use_t5_mask = false; bool chroma_use_t5_mask = false;
int chroma_t5_mask_pad = 1; int chroma_t5_mask_pad = 1;
}; };
void print_params(SDParams params) { void print_params(SDParams params) {
@ -919,7 +919,7 @@ int main(int argc, const char* argv[]) {
input_image_buffer = resized_image_buffer; input_image_buffer = resized_image_buffer;
} }
} else if (params.mode == EDIT) { } else if (params.mode == EDIT) {
vae_decode_only = false; vae_decode_only = false;
for (auto& path : params.ref_image_paths) { for (auto& path : params.ref_image_paths) {
int c = 0; int c = 0;
int width = 0; int width = 0;
@ -1113,7 +1113,7 @@ int main(int argc, const char* argv[]) {
params.skip_layer_start, params.skip_layer_start,
params.skip_layer_end); params.skip_layer_end);
} }
} else { // EDIT } else { // EDIT
results = edit(sd_ctx, results = edit(sd_ctx,
ref_images.data(), ref_images.data(),
ref_images.size(), ref_images.size(),
@ -1176,11 +1176,11 @@ int main(int argc, const char* argv[]) {
std::string dummy_name, ext, lc_ext; std::string dummy_name, ext, lc_ext;
bool is_jpg; bool is_jpg;
size_t last = params.output_path.find_last_of("."); size_t last = params.output_path.find_last_of(".");
size_t last_path = std::min(params.output_path.find_last_of("/"), size_t last_path = std::min(params.output_path.find_last_of("/"),
params.output_path.find_last_of("\\")); params.output_path.find_last_of("\\"));
if (last != std::string::npos // filename has extension if (last != std::string::npos // filename has extension
&& (last_path == std::string::npos || last > last_path)) { && (last_path == std::string::npos || last > last_path)) {
dummy_name = params.output_path.substr(0, last); dummy_name = params.output_path.substr(0, last);
ext = lc_ext = params.output_path.substr(last); ext = lc_ext = params.output_path.substr(last);
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower); std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
@ -1188,7 +1188,7 @@ int main(int argc, const char* argv[]) {
} else { } else {
dummy_name = params.output_path; dummy_name = params.output_path;
ext = lc_ext = ""; ext = lc_ext = "";
is_jpg = false; is_jpg = false;
} }
// appending ".png" to absent or unknown extension // appending ".png" to absent or unknown extension
if (!is_jpg && lc_ext != ".png") { if (!is_jpg && lc_ext != ".png") {
@ -1200,7 +1200,7 @@ int main(int argc, const char* argv[]) {
continue; continue;
} }
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext; std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
if(is_jpg) { if (is_jpg) {
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel, stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
results[i].data, 90, get_image_params(params, params.seed + i).c_str()); results[i].data, 90, get_image_params(params, params.seed + i).c_str());
printf("save result JPEG image to '%s'\n", final_image_path.c_str()); printf("save result JPEG image to '%s'\n", final_image_path.c_str());

View File

@ -512,7 +512,8 @@ namespace Flux {
LastLayer(int64_t hidden_size, LastLayer(int64_t hidden_size,
int64_t patch_size, int64_t patch_size,
int64_t out_channels, int64_t out_channels,
bool prune_mod = false) : prune_mod(prune_mod) { bool prune_mod = false)
: prune_mod(prune_mod) {
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false)); blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels)); blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
if (!prune_mod) { if (!prune_mod) {
@ -723,7 +724,7 @@ namespace Flux {
auto txt_ids = gen_txt_ids(bs, context_len); auto txt_ids = gen_txt_ids(bs, context_len);
auto img_ids = gen_img_ids(h, w, patch_size, bs); auto img_ids = gen_img_ids(h, w, patch_size, bs);
auto ids = concat_ids(txt_ids, img_ids, bs); auto ids = concat_ids(txt_ids, img_ids, bs);
uint64_t curr_h_offset = 0; uint64_t curr_h_offset = 0;
uint64_t curr_w_offset = 0; uint64_t curr_w_offset = 0;
for (ggml_tensor* ref : ref_latents) { for (ggml_tensor* ref : ref_latents) {
@ -736,7 +737,7 @@ namespace Flux {
} }
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset); auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
ids = concat_ids(ids, ref_ids, bs); ids = concat_ids(ids, ref_ids, bs);
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset); curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset); curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
@ -744,7 +745,6 @@ namespace Flux {
return ids; return ids;
} }
// Generate positional embeddings // Generate positional embeddings
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) { std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents); std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
@ -872,8 +872,8 @@ namespace Flux {
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
struct ggml_tensor* pe, struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL, struct ggml_tensor* mod_index_arange = NULL,
std::vector<int> skip_layers = {}) { std::vector<int> skip_layers = {}) {
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]); auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]); auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]); auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
@ -962,7 +962,6 @@ namespace Flux {
struct ggml_tensor* process_img(struct ggml_context* ctx, struct ggml_tensor* process_img(struct ggml_context* ctx,
struct ggml_tensor* x) { struct ggml_tensor* x) {
int64_t W = x->ne[0]; int64_t W = x->ne[0];
int64_t H = x->ne[1]; int64_t H = x->ne[1];
int64_t patch_size = 2; int64_t patch_size = 2;
@ -983,9 +982,9 @@ namespace Flux {
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
struct ggml_tensor* pe, struct ggml_tensor* pe,
struct ggml_tensor* mod_index_arange = NULL, struct ggml_tensor* mod_index_arange = NULL,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) { std::vector<int> skip_layers = {}) {
// Forward pass of DiT. // Forward pass of DiT.
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images) // x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
// timestep: (N,) tensor of diffusion timesteps // timestep: (N,) tensor of diffusion timesteps
@ -1005,7 +1004,7 @@ namespace Flux {
int pad_h = (patch_size - H % patch_size) % patch_size; int pad_h = (patch_size - H % patch_size) % patch_size;
int pad_w = (patch_size - W % patch_size) % patch_size; int pad_w = (patch_size - W % patch_size) % patch_size;
auto img = process_img(ctx, x); auto img = process_img(ctx, x);
uint64_t img_tokens = img->ne[1]; uint64_t img_tokens = img->ne[1];
if (c_concat != NULL) { if (c_concat != NULL) {
@ -1013,7 +1012,7 @@ namespace Flux {
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C); ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
masked = process_img(ctx, masked); masked = process_img(ctx, masked);
mask = process_img(ctx, mask); mask = process_img(ctx, mask);
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0); img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
} }
@ -1027,9 +1026,9 @@ namespace Flux {
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size] auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
if (out->ne[1] > img_tokens) { if (out->ne[1] > img_tokens) {
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size] out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0); out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size] out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
} }
// rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2) // rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
@ -1120,7 +1119,7 @@ namespace Flux {
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}) { std::vector<int> skip_layers = {}) {
GGML_ASSERT(x->ne[3] == 1); GGML_ASSERT(x->ne[3] == 1);
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false); struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
@ -1139,8 +1138,8 @@ namespace Flux {
} }
// ggml_arange is not working on some backends, precompute it // ggml_arange is not working on some backends, precompute it
mod_index_arange_vec = arange(0, 344); mod_index_arange_vec = arange(0, 344);
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size()); mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data()); set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
} }
y = to_backend(y); y = to_backend(y);
@ -1187,9 +1186,9 @@ namespace Flux {
struct ggml_tensor* y, struct ggml_tensor* y,
struct ggml_tensor* guidance, struct ggml_tensor* guidance,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
struct ggml_tensor** output = NULL, struct ggml_tensor** output = NULL,
struct ggml_context* output_ctx = NULL, struct ggml_context* output_ctx = NULL,
std::vector<int> skip_layers = std::vector<int>()) { std::vector<int> skip_layers = std::vector<int>()) {
// x: [N, in_channels, h, w] // x: [N, in_channels, h, w]
// timesteps: [N, ] // timesteps: [N, ]
// context: [N, max_position, hidden_size] // context: [N, max_position, hidden_size]

View File

@ -291,7 +291,6 @@ struct LoraModel : public GGMLRunner {
std::string hada_2_down_name = ""; std::string hada_2_down_name = "";
std::string hada_2_up_name = ""; std::string hada_2_up_name = "";
hada_1_down_name = fk + ".hada_w1_b"; hada_1_down_name = fk + ".hada_w1_b";
hada_1_up_name = fk + ".hada_w1_a"; hada_1_up_name = fk + ".hada_w1_a";
hada_1_mid_name = fk + ".hada_t1"; hada_1_mid_name = fk + ".hada_t1";

View File

@ -12,9 +12,9 @@
#include "ggml-backend.h" #include "ggml-backend.h"
#include "ggml.h" #include "ggml.h"
#include "gguf.h"
#include "json.hpp" #include "json.hpp"
#include "zip.h" #include "zip.h"
#include "gguf.h"
#define SD_MAX_DIMS 5 #define SD_MAX_DIMS 5

View File

@ -48,8 +48,7 @@ const char* sampling_methods_str[] = {
"iPNDM_v", "iPNDM_v",
"LCM", "LCM",
"DDIM \"trailing\"", "DDIM \"trailing\"",
"TCD" "TCD"};
};
/*================================================== Helper Functions ================================================*/ /*================================================== Helper Functions ================================================*/
@ -815,11 +814,11 @@ public:
int start_merge_step, int start_merge_step,
SDCondition id_cond, SDCondition id_cond,
std::vector<ggml_tensor*> ref_latents = {}, std::vector<ggml_tensor*> ref_latents = {},
std::vector<int> skip_layers = {}, std::vector<int> skip_layers = {},
float slg_scale = 0, float slg_scale = 0,
float skip_layer_start = 0.01, float skip_layer_start = 0.01,
float skip_layer_end = 0.2, float skip_layer_end = 0.2,
ggml_tensor* noise_mask = nullptr) { ggml_tensor* noise_mask = nullptr) {
LOG_DEBUG("Sample"); LOG_DEBUG("Sample");
struct ggml_init_params params; struct ggml_init_params params;
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]); size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@ -1973,7 +1972,6 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
return result_images; return result_images;
} }
sd_image_t* edit(sd_ctx_t* sd_ctx, sd_image_t* edit(sd_ctx_t* sd_ctx,
sd_image_t* ref_images, sd_image_t* ref_images,
int ref_images_count, int ref_images_count,

View File

@ -61,10 +61,10 @@ enum schedule_t {
// same as enum ggml_type // same as enum ggml_type
enum sd_type_t { enum sd_type_t {
SD_TYPE_F32 = 0, SD_TYPE_F32 = 0,
SD_TYPE_F16 = 1, SD_TYPE_F16 = 1,
SD_TYPE_Q4_0 = 2, SD_TYPE_Q4_0 = 2,
SD_TYPE_Q4_1 = 3, SD_TYPE_Q4_1 = 3,
// SD_TYPE_Q4_2 = 4, support has been removed // SD_TYPE_Q4_2 = 4, support has been removed
// SD_TYPE_Q4_3 = 5, support has been removed // SD_TYPE_Q4_3 = 5, support has been removed
SD_TYPE_Q5_0 = 6, SD_TYPE_Q5_0 = 6,
@ -95,12 +95,12 @@ enum sd_type_t {
// SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files // SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
// SD_TYPE_Q4_0_4_8 = 32, // SD_TYPE_Q4_0_4_8 = 32,
// SD_TYPE_Q4_0_8_8 = 33, // SD_TYPE_Q4_0_8_8 = 33,
SD_TYPE_TQ1_0 = 34, SD_TYPE_TQ1_0 = 34,
SD_TYPE_TQ2_0 = 35, SD_TYPE_TQ2_0 = 35,
// SD_TYPE_IQ4_NL_4_4 = 36, // SD_TYPE_IQ4_NL_4_4 = 36,
// SD_TYPE_IQ4_NL_4_8 = 37, // SD_TYPE_IQ4_NL_4_8 = 37,
// SD_TYPE_IQ4_NL_8_8 = 38, // SD_TYPE_IQ4_NL_8_8 = 38,
SD_TYPE_COUNT = 39, SD_TYPE_COUNT = 39,
}; };
SD_API const char* sd_type_name(enum sd_type_t type); SD_API const char* sd_type_name(enum sd_type_t type);

2
t5.hpp
View File

@ -797,7 +797,7 @@ struct T5Runner : public GGMLRunner {
struct ggml_tensor* input_ids, struct ggml_tensor* input_ids,
struct ggml_tensor* attention_mask, struct ggml_tensor* attention_mask,
ggml_tensor** output, ggml_tensor** output,
ggml_context* output_ctx = NULL) { ggml_context* output_ctx = NULL) {
auto get_graph = [&]() -> struct ggml_cgraph* { auto get_graph = [&]() -> struct ggml_cgraph* {
return build_graph(input_ids, attention_mask); return build_graph(input_ids, attention_mask);
}; };

View File

@ -112,7 +112,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str()); sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
// Find the first file in the directory // Find the first file in the directory
hFind = FindFirstFile(directoryPath, &findFileData); hFind = FindFirstFile(directoryPath, &findFileData);
bool isAbsolutePath = false; bool isAbsolutePath = false;
// Check if the directory was found // Check if the directory was found
if (hFind == INVALID_HANDLE_VALUE) { if (hFind == INVALID_HANDLE_VALUE) {
@ -121,7 +121,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
char directoryPathAbsolute[MAX_PATH]; char directoryPathAbsolute[MAX_PATH];
sprintf(directoryPathAbsolute, "%s*", dir.c_str()); sprintf(directoryPathAbsolute, "%s*", dir.c_str());
hFind = FindFirstFile(directoryPathAbsolute, &findFileData); hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
isAbsolutePath = true; isAbsolutePath = true;
if (hFind == INVALID_HANDLE_VALUE) { if (hFind == INVALID_HANDLE_VALUE) {
printf("Absolute path was also wrong.\n"); printf("Absolute path was also wrong.\n");