mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-12 13:28:37 +00:00
style: format code
This commit is contained in:
parent
b1cc40c35c
commit
45d0ebb30c
@ -1224,14 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
||||
T5UniGramTokenizer t5_tokenizer;
|
||||
std::shared_ptr<T5Runner> t5;
|
||||
size_t chunk_len = 512;
|
||||
bool use_mask = false;
|
||||
int mask_pad = 1;
|
||||
bool use_mask = false;
|
||||
int mask_pad = 1;
|
||||
|
||||
PixArtCLIPEmbedder(ggml_backend_t backend,
|
||||
std::map<std::string, enum ggml_type>& tensor_types,
|
||||
int clip_skip = -1,
|
||||
bool use_mask = false,
|
||||
int mask_pad = 1) : use_mask(use_mask), mask_pad(mask_pad) {
|
||||
int mask_pad = 1)
|
||||
: use_mask(use_mask), mask_pad(mask_pad) {
|
||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||
}
|
||||
|
||||
|
||||
72
denoiser.hpp
72
denoiser.hpp
@ -1019,7 +1019,7 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// also needed to invert the behavior of CompVisDenoiser
|
||||
// (k-diffusion's LMSDiscreteScheduler)
|
||||
float beta_start = 0.00085f;
|
||||
float beta_end = 0.0120f;
|
||||
float beta_end = 0.0120f;
|
||||
std::vector<double> alphas_cumprod;
|
||||
std::vector<double> compvis_sigmas;
|
||||
|
||||
@ -1030,8 +1030,9 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
|
||||
(1.0f -
|
||||
std::pow(sqrtf(beta_start) +
|
||||
(sqrtf(beta_end) - sqrtf(beta_start)) *
|
||||
((float)i / (TIMESTEPS - 1)), 2));
|
||||
(sqrtf(beta_end) - sqrtf(beta_start)) *
|
||||
((float)i / (TIMESTEPS - 1)),
|
||||
2));
|
||||
compvis_sigmas[i] =
|
||||
std::sqrt((1 - alphas_cumprod[i]) /
|
||||
alphas_cumprod[i]);
|
||||
@ -1061,7 +1062,8 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// - pred_prev_sample -> "x_t-1"
|
||||
int timestep =
|
||||
roundf(TIMESTEPS -
|
||||
i * ((float)TIMESTEPS / steps)) - 1;
|
||||
i * ((float)TIMESTEPS / steps)) -
|
||||
1;
|
||||
// 1. get previous step value (=t-1)
|
||||
int prev_timestep = timestep - TIMESTEPS / steps;
|
||||
// The sigma here is chosen to cause the
|
||||
@ -1086,10 +1088,9 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
float* vec_x = (float*)x->data;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
|
||||
sigma;
|
||||
sigma;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// For the subsequent steps after the first one,
|
||||
// at this point x = latents or x = sample, and
|
||||
// needs to be prescaled with x <- sample / c_in
|
||||
@ -1127,9 +1128,8 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
float alpha_prod_t = alphas_cumprod[timestep];
|
||||
// Note final_alpha_cumprod = alphas_cumprod[0] due to
|
||||
// trailing timestep spacing
|
||||
float alpha_prod_t_prev = prev_timestep >= 0 ?
|
||||
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
|
||||
float beta_prod_t = 1 - alpha_prod_t;
|
||||
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
|
||||
float beta_prod_t = 1 - alpha_prod_t;
|
||||
// 3. compute predicted original sample from predicted
|
||||
// noise also called "predicted x_0" of formula (12)
|
||||
// from https://arxiv.org/pdf/2010.02502.pdf
|
||||
@ -1145,7 +1145,7 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
vec_pred_original_sample[j] =
|
||||
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
|
||||
std::sqrt(beta_prod_t) *
|
||||
vec_model_output[j]) *
|
||||
vec_model_output[j]) *
|
||||
(1 / std::sqrt(alpha_prod_t));
|
||||
}
|
||||
}
|
||||
@ -1159,8 +1159,8 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
|
||||
// sqrt(1 - alpha_t/alpha_t-1)
|
||||
float beta_prod_t_prev = 1 - alpha_prod_t_prev;
|
||||
float variance = (beta_prod_t_prev / beta_prod_t) *
|
||||
(1 - alpha_prod_t / alpha_prod_t_prev);
|
||||
float variance = (beta_prod_t_prev / beta_prod_t) *
|
||||
(1 - alpha_prod_t / alpha_prod_t_prev);
|
||||
float std_dev_t = eta * std::sqrt(variance);
|
||||
// 6. compute "direction pointing to x_t" of formula
|
||||
// (12) from https://arxiv.org/pdf/2010.02502.pdf
|
||||
@ -1179,8 +1179,8 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
std::pow(std_dev_t, 2)) *
|
||||
vec_model_output[j];
|
||||
vec_x[j] = std::sqrt(alpha_prod_t_prev) *
|
||||
vec_pred_original_sample[j] +
|
||||
pred_sample_direction;
|
||||
vec_pred_original_sample[j] +
|
||||
pred_sample_direction;
|
||||
}
|
||||
}
|
||||
if (eta > 0) {
|
||||
@ -1208,7 +1208,7 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// by Semi-Linear Consistency Function with Trajectory
|
||||
// Mapping", arXiv:2402.19159 [cs.CV]
|
||||
float beta_start = 0.00085f;
|
||||
float beta_end = 0.0120f;
|
||||
float beta_end = 0.0120f;
|
||||
std::vector<double> alphas_cumprod;
|
||||
std::vector<double> compvis_sigmas;
|
||||
|
||||
@ -1219,8 +1219,9 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
(i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
|
||||
(1.0f -
|
||||
std::pow(sqrtf(beta_start) +
|
||||
(sqrtf(beta_end) - sqrtf(beta_start)) *
|
||||
((float)i / (TIMESTEPS - 1)), 2));
|
||||
(sqrtf(beta_end) - sqrtf(beta_start)) *
|
||||
((float)i / (TIMESTEPS - 1)),
|
||||
2));
|
||||
compvis_sigmas[i] =
|
||||
std::sqrt((1 - alphas_cumprod[i]) /
|
||||
alphas_cumprod[i]);
|
||||
@ -1235,13 +1236,10 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
for (int i = 0; i < steps; i++) {
|
||||
// Analytic form for TCD timesteps
|
||||
int timestep = TIMESTEPS - 1 -
|
||||
(TIMESTEPS / original_steps) *
|
||||
(int)floor(i * ((float)original_steps / steps));
|
||||
(TIMESTEPS / original_steps) *
|
||||
(int)floor(i * ((float)original_steps / steps));
|
||||
// 1. get previous step value
|
||||
int prev_timestep = i >= steps - 1 ? 0 :
|
||||
TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
|
||||
(int)floor((i + 1) *
|
||||
((float)original_steps / steps));
|
||||
int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
|
||||
// Here timestep_s is tau_n' in Algorithm 4. The _s
|
||||
// notation appears to be that from C. Lu,
|
||||
// "DPM-Solver: A Fast ODE Solver for Diffusion
|
||||
@ -1258,10 +1256,9 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
float* vec_x = (float*)x->data;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] *= std::sqrt(sigma * sigma + 1) /
|
||||
sigma;
|
||||
sigma;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
float* vec_x = (float*)x->data;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
vec_x[j] *= std::sqrt(sigma * sigma + 1);
|
||||
@ -1294,15 +1291,14 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// DPM-Solver. In fact, we have alpha_{t_n} =
|
||||
// \sqrt{\hat{alpha_n}}, [...]"
|
||||
float alpha_prod_t = alphas_cumprod[timestep];
|
||||
float beta_prod_t = 1 - alpha_prod_t;
|
||||
float beta_prod_t = 1 - alpha_prod_t;
|
||||
// Note final_alpha_cumprod = alphas_cumprod[0] since
|
||||
// TCD is always "trailing"
|
||||
float alpha_prod_t_prev = prev_timestep >= 0 ?
|
||||
alphas_cumprod[prev_timestep] : alphas_cumprod[0];
|
||||
float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
|
||||
// The subscript _s are the only portion in this
|
||||
// section (2) unique to TCD
|
||||
float alpha_prod_s = alphas_cumprod[timestep_s];
|
||||
float beta_prod_s = 1 - alpha_prod_s;
|
||||
float beta_prod_s = 1 - alpha_prod_s;
|
||||
// 3. Compute the predicted noised sample x_s based on
|
||||
// the model parameterization
|
||||
//
|
||||
@ -1317,7 +1313,7 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
vec_pred_original_sample[j] =
|
||||
(vec_x[j] / std::sqrt(sigma * sigma + 1) -
|
||||
std::sqrt(beta_prod_t) *
|
||||
vec_model_output[j]) *
|
||||
vec_model_output[j]) *
|
||||
(1 / std::sqrt(alpha_prod_t));
|
||||
}
|
||||
}
|
||||
@ -1339,9 +1335,9 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// pred_epsilon = model_output
|
||||
vec_x[j] =
|
||||
std::sqrt(alpha_prod_s) *
|
||||
vec_pred_original_sample[j] +
|
||||
vec_pred_original_sample[j] +
|
||||
std::sqrt(beta_prod_s) *
|
||||
vec_model_output[j];
|
||||
vec_model_output[j];
|
||||
}
|
||||
}
|
||||
// 4. Sample and inject noise z ~ N(0, I) for
|
||||
@ -1357,7 +1353,7 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
// In this case, x is still pred_noised_sample,
|
||||
// continue in-place
|
||||
ggml_tensor_set_f32_randn(noise, rng);
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_x = (float*)x->data;
|
||||
float* vec_noise = (float*)noise->data;
|
||||
for (int j = 0; j < ggml_nelements(x); j++) {
|
||||
// Corresponding to (35) in Zheng et
|
||||
@ -1366,10 +1362,10 @@ static void sample_k_diffusion(sample_method_t method,
|
||||
vec_x[j] =
|
||||
std::sqrt(alpha_prod_t_prev /
|
||||
alpha_prod_s) *
|
||||
vec_x[j] +
|
||||
vec_x[j] +
|
||||
std::sqrt(1 - alpha_prod_t_prev /
|
||||
alpha_prod_s) *
|
||||
vec_noise[j];
|
||||
alpha_prod_s) *
|
||||
vec_noise[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@ struct DiffusionModel {
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
|
||||
struct ggml_tensor* c_concat,
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
int num_video_frames = -1,
|
||||
std::vector<struct ggml_tensor*> controls = {},
|
||||
float control_strength = 0.f,
|
||||
|
||||
@ -133,9 +133,9 @@ struct SDParams {
|
||||
float skip_layer_start = 0.01f;
|
||||
float skip_layer_end = 0.2f;
|
||||
|
||||
bool chroma_use_dit_mask = true;
|
||||
bool chroma_use_t5_mask = false;
|
||||
int chroma_t5_mask_pad = 1;
|
||||
bool chroma_use_dit_mask = true;
|
||||
bool chroma_use_t5_mask = false;
|
||||
int chroma_t5_mask_pad = 1;
|
||||
};
|
||||
|
||||
void print_params(SDParams params) {
|
||||
@ -919,7 +919,7 @@ int main(int argc, const char* argv[]) {
|
||||
input_image_buffer = resized_image_buffer;
|
||||
}
|
||||
} else if (params.mode == EDIT) {
|
||||
vae_decode_only = false;
|
||||
vae_decode_only = false;
|
||||
for (auto& path : params.ref_image_paths) {
|
||||
int c = 0;
|
||||
int width = 0;
|
||||
@ -1113,7 +1113,7 @@ int main(int argc, const char* argv[]) {
|
||||
params.skip_layer_start,
|
||||
params.skip_layer_end);
|
||||
}
|
||||
} else { // EDIT
|
||||
} else { // EDIT
|
||||
results = edit(sd_ctx,
|
||||
ref_images.data(),
|
||||
ref_images.size(),
|
||||
@ -1176,11 +1176,11 @@ int main(int argc, const char* argv[]) {
|
||||
|
||||
std::string dummy_name, ext, lc_ext;
|
||||
bool is_jpg;
|
||||
size_t last = params.output_path.find_last_of(".");
|
||||
size_t last = params.output_path.find_last_of(".");
|
||||
size_t last_path = std::min(params.output_path.find_last_of("/"),
|
||||
params.output_path.find_last_of("\\"));
|
||||
if (last != std::string::npos // filename has extension
|
||||
&& (last_path == std::string::npos || last > last_path)) {
|
||||
if (last != std::string::npos // filename has extension
|
||||
&& (last_path == std::string::npos || last > last_path)) {
|
||||
dummy_name = params.output_path.substr(0, last);
|
||||
ext = lc_ext = params.output_path.substr(last);
|
||||
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
|
||||
@ -1188,7 +1188,7 @@ int main(int argc, const char* argv[]) {
|
||||
} else {
|
||||
dummy_name = params.output_path;
|
||||
ext = lc_ext = "";
|
||||
is_jpg = false;
|
||||
is_jpg = false;
|
||||
}
|
||||
// appending ".png" to absent or unknown extension
|
||||
if (!is_jpg && lc_ext != ".png") {
|
||||
@ -1200,7 +1200,7 @@ int main(int argc, const char* argv[]) {
|
||||
continue;
|
||||
}
|
||||
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
|
||||
if(is_jpg) {
|
||||
if (is_jpg) {
|
||||
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
|
||||
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
|
||||
printf("save result JPEG image to '%s'\n", final_image_path.c_str());
|
||||
|
||||
39
flux.hpp
39
flux.hpp
@ -512,7 +512,8 @@ namespace Flux {
|
||||
LastLayer(int64_t hidden_size,
|
||||
int64_t patch_size,
|
||||
int64_t out_channels,
|
||||
bool prune_mod = false) : prune_mod(prune_mod) {
|
||||
bool prune_mod = false)
|
||||
: prune_mod(prune_mod) {
|
||||
blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
|
||||
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
|
||||
if (!prune_mod) {
|
||||
@ -723,7 +724,7 @@ namespace Flux {
|
||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
||||
|
||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
||||
uint64_t curr_h_offset = 0;
|
||||
uint64_t curr_w_offset = 0;
|
||||
for (ggml_tensor* ref : ref_latents) {
|
||||
@ -736,7 +737,7 @@ namespace Flux {
|
||||
}
|
||||
|
||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
|
||||
ids = concat_ids(ids, ref_ids, bs);
|
||||
ids = concat_ids(ids, ref_ids, bs);
|
||||
|
||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
||||
@ -744,7 +745,6 @@ namespace Flux {
|
||||
return ids;
|
||||
}
|
||||
|
||||
|
||||
// Generate positional embeddings
|
||||
std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
|
||||
std::vector<std::vector<float>> ids = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
|
||||
@ -872,8 +872,8 @@ namespace Flux {
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
std::vector<int> skip_layers = {}) {
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
std::vector<int> skip_layers = {}) {
|
||||
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
||||
auto txt_in = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
|
||||
auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
|
||||
@ -887,7 +887,7 @@ namespace Flux {
|
||||
auto distill_timestep = ggml_nn_timestep_embedding(ctx, timesteps, 16, 10000, 1000.f);
|
||||
auto distill_guidance = ggml_nn_timestep_embedding(ctx, guidance, 16, 10000, 1000.f);
|
||||
|
||||
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
|
||||
// auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
|
||||
// ggml_arange tot working on a lot of backends, precomputing it on CPU instead
|
||||
GGML_ASSERT(arange != NULL);
|
||||
auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f); // [1, 344, 32]
|
||||
@ -962,7 +962,6 @@ namespace Flux {
|
||||
|
||||
struct ggml_tensor* process_img(struct ggml_context* ctx,
|
||||
struct ggml_tensor* x) {
|
||||
|
||||
int64_t W = x->ne[0];
|
||||
int64_t H = x->ne[1];
|
||||
int64_t patch_size = 2;
|
||||
@ -983,9 +982,9 @@ namespace Flux {
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
struct ggml_tensor* pe,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
struct ggml_tensor* mod_index_arange = NULL,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {}) {
|
||||
std::vector<int> skip_layers = {}) {
|
||||
// Forward pass of DiT.
|
||||
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
||||
// timestep: (N,) tensor of diffusion timesteps
|
||||
@ -1005,7 +1004,7 @@ namespace Flux {
|
||||
int pad_h = (patch_size - H % patch_size) % patch_size;
|
||||
int pad_w = (patch_size - W % patch_size) % patch_size;
|
||||
|
||||
auto img = process_img(ctx, x);
|
||||
auto img = process_img(ctx, x);
|
||||
uint64_t img_tokens = img->ne[1];
|
||||
|
||||
if (c_concat != NULL) {
|
||||
@ -1013,7 +1012,7 @@ namespace Flux {
|
||||
ggml_tensor* mask = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
|
||||
|
||||
masked = process_img(ctx, masked);
|
||||
mask = process_img(ctx, mask);
|
||||
mask = process_img(ctx, mask);
|
||||
|
||||
img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
|
||||
}
|
||||
@ -1027,9 +1026,9 @@ namespace Flux {
|
||||
|
||||
auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers); // [N, num_tokens, C * patch_size * patch_size]
|
||||
if (out->ne[1] > img_tokens) {
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||
out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
|
||||
out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
|
||||
}
|
||||
|
||||
// rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
|
||||
@ -1120,7 +1119,7 @@ namespace Flux {
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {}) {
|
||||
std::vector<int> skip_layers = {}) {
|
||||
GGML_ASSERT(x->ne[3] == 1);
|
||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
|
||||
|
||||
@ -1139,8 +1138,8 @@ namespace Flux {
|
||||
}
|
||||
|
||||
// ggml_arange is not working on some backends, precompute it
|
||||
mod_index_arange_vec = arange(0, 344);
|
||||
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
|
||||
mod_index_arange_vec = arange(0, 344);
|
||||
mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
|
||||
set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
|
||||
}
|
||||
y = to_backend(y);
|
||||
@ -1187,9 +1186,9 @@ namespace Flux {
|
||||
struct ggml_tensor* y,
|
||||
struct ggml_tensor* guidance,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
struct ggml_tensor** output = NULL,
|
||||
struct ggml_context* output_ctx = NULL,
|
||||
std::vector<int> skip_layers = std::vector<int>()) {
|
||||
// x: [N, in_channels, h, w]
|
||||
// timesteps: [N, ]
|
||||
// context: [N, max_position, hidden_size]
|
||||
|
||||
3
lora.hpp
3
lora.hpp
@ -291,7 +291,6 @@ struct LoraModel : public GGMLRunner {
|
||||
std::string hada_2_down_name = "";
|
||||
std::string hada_2_up_name = "";
|
||||
|
||||
|
||||
hada_1_down_name = fk + ".hada_w1_b";
|
||||
hada_1_up_name = fk + ".hada_w1_a";
|
||||
hada_1_mid_name = fk + ".hada_t1";
|
||||
@ -414,7 +413,7 @@ struct LoraModel : public GGMLRunner {
|
||||
}
|
||||
lokr_w2 = ggml_merge_lora(compute_ctx, down, up);
|
||||
}
|
||||
|
||||
|
||||
// Technically it might be unused, but I believe it's the expected behavior
|
||||
applied_lora_tensors.insert(alpha_name);
|
||||
|
||||
|
||||
2
model.h
2
model.h
@ -12,9 +12,9 @@
|
||||
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
#include "json.hpp"
|
||||
#include "zip.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#define SD_MAX_DIMS 5
|
||||
|
||||
|
||||
@ -48,8 +48,7 @@ const char* sampling_methods_str[] = {
|
||||
"iPNDM_v",
|
||||
"LCM",
|
||||
"DDIM \"trailing\"",
|
||||
"TCD"
|
||||
};
|
||||
"TCD"};
|
||||
|
||||
/*================================================== Helper Functions ================================================*/
|
||||
|
||||
@ -696,7 +695,7 @@ public:
|
||||
float curr_multiplier = kv.second;
|
||||
lora_state_diff[lora_name] -= curr_multiplier;
|
||||
}
|
||||
|
||||
|
||||
size_t rm = lora_state_diff.size() - lora_state.size();
|
||||
if (rm != 0) {
|
||||
LOG_INFO("Attempting to apply %lu LoRAs (removing %lu applied LoRAs)", lora_state.size(), rm);
|
||||
@ -815,11 +814,11 @@ public:
|
||||
int start_merge_step,
|
||||
SDCondition id_cond,
|
||||
std::vector<ggml_tensor*> ref_latents = {},
|
||||
std::vector<int> skip_layers = {},
|
||||
float slg_scale = 0,
|
||||
float skip_layer_start = 0.01,
|
||||
float skip_layer_end = 0.2,
|
||||
ggml_tensor* noise_mask = nullptr) {
|
||||
std::vector<int> skip_layers = {},
|
||||
float slg_scale = 0,
|
||||
float skip_layer_start = 0.01,
|
||||
float skip_layer_end = 0.2,
|
||||
ggml_tensor* noise_mask = nullptr) {
|
||||
LOG_DEBUG("Sample");
|
||||
struct ggml_init_params params;
|
||||
size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
|
||||
@ -1973,7 +1972,6 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
|
||||
return result_images;
|
||||
}
|
||||
|
||||
|
||||
sd_image_t* edit(sd_ctx_t* sd_ctx,
|
||||
sd_image_t* ref_images,
|
||||
int ref_images_count,
|
||||
@ -2062,7 +2060,7 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
|
||||
}
|
||||
ref_latents.push_back(latent);
|
||||
}
|
||||
|
||||
|
||||
size_t t1 = ggml_time_ms();
|
||||
LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
|
||||
|
||||
|
||||
@ -61,10 +61,10 @@ enum schedule_t {
|
||||
|
||||
// same as enum ggml_type
|
||||
enum sd_type_t {
|
||||
SD_TYPE_F32 = 0,
|
||||
SD_TYPE_F16 = 1,
|
||||
SD_TYPE_Q4_0 = 2,
|
||||
SD_TYPE_Q4_1 = 3,
|
||||
SD_TYPE_F32 = 0,
|
||||
SD_TYPE_F16 = 1,
|
||||
SD_TYPE_Q4_0 = 2,
|
||||
SD_TYPE_Q4_1 = 3,
|
||||
// SD_TYPE_Q4_2 = 4, support has been removed
|
||||
// SD_TYPE_Q4_3 = 5, support has been removed
|
||||
SD_TYPE_Q5_0 = 6,
|
||||
@ -95,12 +95,12 @@ enum sd_type_t {
|
||||
// SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
||||
// SD_TYPE_Q4_0_4_8 = 32,
|
||||
// SD_TYPE_Q4_0_8_8 = 33,
|
||||
SD_TYPE_TQ1_0 = 34,
|
||||
SD_TYPE_TQ2_0 = 35,
|
||||
SD_TYPE_TQ1_0 = 34,
|
||||
SD_TYPE_TQ2_0 = 35,
|
||||
// SD_TYPE_IQ4_NL_4_4 = 36,
|
||||
// SD_TYPE_IQ4_NL_4_8 = 37,
|
||||
// SD_TYPE_IQ4_NL_8_8 = 38,
|
||||
SD_TYPE_COUNT = 39,
|
||||
SD_TYPE_COUNT = 39,
|
||||
};
|
||||
|
||||
SD_API const char* sd_type_name(enum sd_type_t type);
|
||||
|
||||
4
t5.hpp
4
t5.hpp
@ -434,7 +434,7 @@ public:
|
||||
tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);
|
||||
weights.insert(weights.end(), length - weights.size(), 1.0);
|
||||
if (attention_mask != nullptr) {
|
||||
// maybe keep some padding tokens unmasked?
|
||||
// maybe keep some padding tokens unmasked?
|
||||
attention_mask->insert(attention_mask->end(), length - attention_mask->size(), -HUGE_VALF);
|
||||
}
|
||||
}
|
||||
@ -797,7 +797,7 @@ struct T5Runner : public GGMLRunner {
|
||||
struct ggml_tensor* input_ids,
|
||||
struct ggml_tensor* attention_mask,
|
||||
ggml_tensor** output,
|
||||
ggml_context* output_ctx = NULL) {
|
||||
ggml_context* output_ctx = NULL) {
|
||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
||||
return build_graph(input_ids, attention_mask);
|
||||
};
|
||||
|
||||
4
util.cpp
4
util.cpp
@ -112,7 +112,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
||||
sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
|
||||
|
||||
// Find the first file in the directory
|
||||
hFind = FindFirstFile(directoryPath, &findFileData);
|
||||
hFind = FindFirstFile(directoryPath, &findFileData);
|
||||
bool isAbsolutePath = false;
|
||||
// Check if the directory was found
|
||||
if (hFind == INVALID_HANDLE_VALUE) {
|
||||
@ -121,7 +121,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
|
||||
char directoryPathAbsolute[MAX_PATH];
|
||||
sprintf(directoryPathAbsolute, "%s*", dir.c_str());
|
||||
|
||||
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
|
||||
hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
|
||||
isAbsolutePath = true;
|
||||
if (hFind == INVALID_HANDLE_VALUE) {
|
||||
printf("Absolute path was also wrong.\n");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user