From 45d0ebb30c126013ce5a34fc8756eedd8197d71a Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Sun, 29 Jun 2025 23:40:55 +0800
Subject: [PATCH] style: format code

---
 conditioner.hpp       |  7 +++--
 denoiser.hpp          | 72 ++++++++++++++++++++-----------------------
 diffusion_model.hpp   |  8 ++---
 examples/cli/main.cpp | 20 ++++++------
 flux.hpp              | 39 ++++++++++++-----------
 lora.hpp              |  3 +-
 model.h               |  2 +-
 stable-diffusion.cpp  | 18 +++++------
 stable-diffusion.h    | 14 ++++-----
 t5.hpp                |  4 +--
 util.cpp              |  4 +--
 11 files changed, 92 insertions(+), 99 deletions(-)
diff --git a/conditioner.hpp b/conditioner.hpp
index f06f1e2..4005fad 100644
--- a/conditioner.hpp
+++ b/conditioner.hpp
@@ -1224,14 +1224,15 @@ struct PixArtCLIPEmbedder : public Conditioner {
     T5UniGramTokenizer t5_tokenizer;
     std::shared_ptr<T5Runner> t5;
     size_t chunk_len = 512;
-    bool use_mask = false;
-    int  mask_pad = 1;
+    bool use_mask    = false;
+    int mask_pad     = 1;
 
     PixArtCLIPEmbedder(ggml_backend_t backend,
                        std::map<std::string, enum ggml_type>& tensor_types,
                        int clip_skip = -1,
                        bool use_mask = false,
-                       int mask_pad = 1) : use_mask(use_mask), mask_pad(mask_pad) {
+                       int mask_pad  = 1)
+        : use_mask(use_mask), mask_pad(mask_pad) {
         t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
     }
 
diff --git a/denoiser.hpp b/denoiser.hpp
index 6679910..5dec109 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -1019,7 +1019,7 @@ static void sample_k_diffusion(sample_method_t method,
             // also needed to invert the behavior of CompVisDenoiser
             // (k-diffusion's LMSDiscreteScheduler)
             float beta_start = 0.00085f;
-            float beta_end = 0.0120f;
+            float beta_end   = 0.0120f;
             std::vector<double> alphas_cumprod;
             std::vector<double> compvis_sigmas;
 
@@ -1030,8 +1030,9 @@ static void sample_k_diffusion(sample_method_t method,
                     (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
                     (1.0f -
                      std::pow(sqrtf(beta_start) +
-                              (sqrtf(beta_end) - sqrtf(beta_start)) *
-                              ((float)i / (TIMESTEPS - 1)), 2));
+                                  (sqrtf(beta_end) - sqrtf(beta_start)) *
+                                      ((float)i / (TIMESTEPS - 1)),
+                              2));
                 compvis_sigmas[i] =
                     std::sqrt((1 - alphas_cumprod[i]) /
                               alphas_cumprod[i]);
@@ -1061,7 +1062,8 @@ static void sample_k_diffusion(sample_method_t method,
                 // - pred_prev_sample -> "x_t-1"
                 int timestep =
                     roundf(TIMESTEPS -
-                           i * ((float)TIMESTEPS / steps)) - 1;
+                           i * ((float)TIMESTEPS / steps)) -
+                    1;
                 // 1. get previous step value (=t-1)
                 int prev_timestep = timestep - TIMESTEPS / steps;
                 // The sigma here is chosen to cause the
@@ -1086,10 +1088,9 @@ static void sample_k_diffusion(sample_method_t method,
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1) /
-                            sigma;
+                                    sigma;
                     }
-                }
-                else {
+                } else {
                     // For the subsequent steps after the first one,
                     // at this point x = latents or x = sample, and
                     // needs to be prescaled with x <- sample / c_in
@@ -1127,9 +1128,8 @@ static void sample_k_diffusion(sample_method_t method,
                 float alpha_prod_t = alphas_cumprod[timestep];
                 // Note final_alpha_cumprod = alphas_cumprod[0] due to
                 // trailing timestep spacing
-                float alpha_prod_t_prev = prev_timestep >= 0 ?
-                    alphas_cumprod[prev_timestep] : alphas_cumprod[0];
-                float beta_prod_t = 1 - alpha_prod_t;
+                float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
+                float beta_prod_t       = 1 - alpha_prod_t;
                 // 3. compute predicted original sample from predicted
                 // noise also called "predicted x_0" of formula (12)
                 // from https://arxiv.org/pdf/2010.02502.pdf
@@ -1145,7 +1145,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_pred_original_sample[j] =
                             (vec_x[j] / std::sqrt(sigma * sigma + 1) -
                              std::sqrt(beta_prod_t) *
-                             vec_model_output[j]) *
+                                 vec_model_output[j]) *
                             (1 / std::sqrt(alpha_prod_t));
                     }
                 }
@@ -1159,8 +1159,8 @@ static void sample_k_diffusion(sample_method_t method,
                 // sigma_t = sqrt((1 - alpha_t-1)/(1 - alpha_t)) *
                 // sqrt(1 - alpha_t/alpha_t-1)
                 float beta_prod_t_prev = 1 - alpha_prod_t_prev;
-                float variance = (beta_prod_t_prev / beta_prod_t) *
-                    (1 - alpha_prod_t / alpha_prod_t_prev);
+                float variance         = (beta_prod_t_prev / beta_prod_t) *
+                                 (1 - alpha_prod_t / alpha_prod_t_prev);
                 float std_dev_t = eta * std::sqrt(variance);
                 // 6. compute "direction pointing to x_t" of formula
                 // (12) from https://arxiv.org/pdf/2010.02502.pdf
@@ -1179,8 +1179,8 @@ static void sample_k_diffusion(sample_method_t method,
                                       std::pow(std_dev_t, 2)) *
                             vec_model_output[j];
                         vec_x[j] = std::sqrt(alpha_prod_t_prev) *
-                            vec_pred_original_sample[j] +
-                            pred_sample_direction;
+                                       vec_pred_original_sample[j] +
+                                   pred_sample_direction;
                     }
                 }
                 if (eta > 0) {
@@ -1208,7 +1208,7 @@ static void sample_k_diffusion(sample_method_t method,
             // by Semi-Linear Consistency Function with Trajectory
             // Mapping", arXiv:2402.19159 [cs.CV]
             float beta_start = 0.00085f;
-            float beta_end = 0.0120f;
+            float beta_end   = 0.0120f;
             std::vector<double> alphas_cumprod;
             std::vector<double> compvis_sigmas;
 
@@ -1219,8 +1219,9 @@ static void sample_k_diffusion(sample_method_t method,
                     (i == 0 ? 1.0f : alphas_cumprod[i - 1]) *
                     (1.0f -
                      std::pow(sqrtf(beta_start) +
-                              (sqrtf(beta_end) - sqrtf(beta_start)) *
-                              ((float)i / (TIMESTEPS - 1)), 2));
+                                  (sqrtf(beta_end) - sqrtf(beta_start)) *
+                                      ((float)i / (TIMESTEPS - 1)),
+                              2));
                 compvis_sigmas[i] =
                     std::sqrt((1 - alphas_cumprod[i]) /
                               alphas_cumprod[i]);
@@ -1235,13 +1236,10 @@ static void sample_k_diffusion(sample_method_t method,
             for (int i = 0; i < steps; i++) {
                 // Analytic form for TCD timesteps
                 int timestep = TIMESTEPS - 1 -
-                    (TIMESTEPS / original_steps) *
-                    (int)floor(i * ((float)original_steps / steps));
+                               (TIMESTEPS / original_steps) *
+                                   (int)floor(i * ((float)original_steps / steps));
                 // 1. get previous step value
-                int prev_timestep = i >= steps - 1 ? 0 :
-                    TIMESTEPS - 1 - (TIMESTEPS / original_steps) *
-                    (int)floor((i + 1) *
-                               ((float)original_steps / steps));
+                int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps));
                 // Here timestep_s is tau_n' in Algorithm 4. The _s
                 // notation appears to be that from C. Lu,
                 // "DPM-Solver: A Fast ODE Solver for Diffusion
@@ -1258,10 +1256,9 @@ static void sample_k_diffusion(sample_method_t method,
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1) /
-                            sigma;
+                                    sigma;
                     }
-                }
-                else {
+                } else {
                     float* vec_x = (float*)x->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         vec_x[j] *= std::sqrt(sigma * sigma + 1);
@@ -1294,15 +1291,14 @@ static void sample_k_diffusion(sample_method_t method,
                 // DPM-Solver. In fact, we have alpha_{t_n} =
                 // \sqrt{\hat{alpha_n}}, [...]"
                 float alpha_prod_t = alphas_cumprod[timestep];
-                float beta_prod_t = 1 - alpha_prod_t;
+                float beta_prod_t  = 1 - alpha_prod_t;
                 // Note final_alpha_cumprod = alphas_cumprod[0] since
                 // TCD is always "trailing"
-                float alpha_prod_t_prev = prev_timestep >= 0 ?
-                    alphas_cumprod[prev_timestep] : alphas_cumprod[0];
+                float alpha_prod_t_prev = prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0];
                 // The subscript _s are the only portion in this
                 // section (2) unique to TCD
                 float alpha_prod_s = alphas_cumprod[timestep_s];
-                float beta_prod_s = 1 - alpha_prod_s;
+                float beta_prod_s  = 1 - alpha_prod_s;
                 // 3. Compute the predicted noised sample x_s based on
                 // the model parameterization
                 //
@@ -1317,7 +1313,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_pred_original_sample[j] =
                             (vec_x[j] / std::sqrt(sigma * sigma + 1) -
                              std::sqrt(beta_prod_t) *
-                             vec_model_output[j]) *
+                                 vec_model_output[j]) *
                             (1 / std::sqrt(alpha_prod_t));
                     }
                 }
@@ -1339,9 +1335,9 @@ static void sample_k_diffusion(sample_method_t method,
                         // pred_epsilon = model_output
                         vec_x[j] =
                             std::sqrt(alpha_prod_s) *
-                            vec_pred_original_sample[j] +
+                                vec_pred_original_sample[j] +
                             std::sqrt(beta_prod_s) *
-                            vec_model_output[j];
+                                vec_model_output[j];
                     }
                 }
                 // 4. Sample and inject noise z ~ N(0, I) for
@@ -1357,7 +1353,7 @@ static void sample_k_diffusion(sample_method_t method,
                     // In this case, x is still pred_noised_sample,
                     // continue in-place
                     ggml_tensor_set_f32_randn(noise, rng);
-                    float* vec_x = (float*)x->data;
+                    float* vec_x     = (float*)x->data;
                     float* vec_noise = (float*)noise->data;
                     for (int j = 0; j < ggml_nelements(x); j++) {
                         // Corresponding to (35) in Zheng et
@@ -1366,10 +1362,10 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_x[j] =
                             std::sqrt(alpha_prod_t_prev /
                                       alpha_prod_s) *
-                            vec_x[j] +
+                                vec_x[j] +
                             std::sqrt(1 - alpha_prod_t_prev /
-                                      alpha_prod_s) *
-                            vec_noise[j];
+                                              alpha_prod_s) *
+                                vec_noise[j];
                     }
                 }
             }
diff --git a/diffusion_model.hpp b/diffusion_model.hpp
index 0f84377..5c34943 100644
--- a/diffusion_model.hpp
+++ b/diffusion_model.hpp
@@ -13,7 +13,7 @@ struct DiffusionModel {
                          struct ggml_tensor* c_concat,
                          struct ggml_tensor* y,
                          struct ggml_tensor* guidance,
-                         std::vector<ggml_tensor*> ref_latents = {},
+                         std::vector<ggml_tensor*> ref_latents     = {},
                          int num_video_frames                      = -1,
                          std::vector<struct ggml_tensor*> controls = {},
                          float control_strength                    = 0.f,
@@ -69,7 +69,7 @@ struct UNetModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
@@ -120,7 +120,7 @@ struct MMDiTModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
@@ -173,7 +173,7 @@ struct FluxModel : public DiffusionModel {
                  struct ggml_tensor* c_concat,
                  struct ggml_tensor* y,
                  struct ggml_tensor* guidance,
-                 std::vector<ggml_tensor*> ref_latents = {},
+                 std::vector<ggml_tensor*> ref_latents     = {},
                  int num_video_frames                      = -1,
                  std::vector<struct ggml_tensor*> controls = {},
                  float control_strength                    = 0.f,
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index 0583ff0..8f68f53 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -133,9 +133,9 @@ struct SDParams {
     float skip_layer_start       = 0.01f;
     float skip_layer_end         = 0.2f;
 
-    bool chroma_use_dit_mask     = true;
-    bool chroma_use_t5_mask      = false;
-    int  chroma_t5_mask_pad      = 1;
+    bool chroma_use_dit_mask = true;
+    bool chroma_use_t5_mask  = false;
+    int chroma_t5_mask_pad   = 1;
 };
 
 void print_params(SDParams params) {
@@ -919,7 +919,7 @@ int main(int argc, const char* argv[]) {
             input_image_buffer = resized_image_buffer;
         }
     } else if (params.mode == EDIT) {
-        vae_decode_only       = false;
+        vae_decode_only = false;
         for (auto& path : params.ref_image_paths) {
             int c                 = 0;
             int width             = 0;
@@ -1113,7 +1113,7 @@ int main(int argc, const char* argv[]) {
                               params.skip_layer_start,
                               params.skip_layer_end);
         }
-    } else { // EDIT
+    } else {  // EDIT
         results = edit(sd_ctx,
                        ref_images.data(),
                        ref_images.size(),
@@ -1176,11 +1176,11 @@ int main(int argc, const char* argv[]) {
 
     std::string dummy_name, ext, lc_ext;
     bool is_jpg;
-    size_t last = params.output_path.find_last_of(".");
+    size_t last      = params.output_path.find_last_of(".");
     size_t last_path = std::min(params.output_path.find_last_of("/"),
                                 params.output_path.find_last_of("\\"));
-    if (last != std::string::npos // filename has extension
-    && (last_path == std::string::npos || last > last_path)) {
+    if (last != std::string::npos  // filename has extension
+        && (last_path == std::string::npos || last > last_path)) {
         dummy_name = params.output_path.substr(0, last);
         ext = lc_ext = params.output_path.substr(last);
         std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
@@ -1188,7 +1188,7 @@ int main(int argc, const char* argv[]) {
     } else {
         dummy_name = params.output_path;
         ext = lc_ext = "";
-        is_jpg = false;
+        is_jpg       = false;
     }
     // appending ".png" to absent or unknown extension
     if (!is_jpg && lc_ext != ".png") {
@@ -1200,7 +1200,7 @@ int main(int argc, const char* argv[]) {
             continue;
         }
         std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
-        if(is_jpg) {
+        if (is_jpg) {
             stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
                            results[i].data, 90, get_image_params(params, params.seed + i).c_str());
             printf("save result JPEG image to '%s'\n", final_image_path.c_str());
diff --git a/flux.hpp b/flux.hpp
index a1e88d5..1104591 100644
--- a/flux.hpp
+++ b/flux.hpp
@@ -512,7 +512,8 @@ namespace Flux {
         LastLayer(int64_t hidden_size,
                   int64_t patch_size,
                   int64_t out_channels,
-                  bool prune_mod = false) : prune_mod(prune_mod) {
+                  bool prune_mod = false)
+            : prune_mod(prune_mod) {
             blocks["norm_final"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size, 1e-06f, false));
             blocks["linear"]     = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, patch_size * patch_size * out_channels));
             if (!prune_mod) {
@@ -723,7 +724,7 @@ namespace Flux {
             auto txt_ids = gen_txt_ids(bs, context_len);
             auto img_ids = gen_img_ids(h, w, patch_size, bs);
 
-            auto ids = concat_ids(txt_ids, img_ids, bs);
+            auto ids               = concat_ids(txt_ids, img_ids, bs);
             uint64_t curr_h_offset = 0;
             uint64_t curr_w_offset = 0;
             for (ggml_tensor* ref : ref_latents) {
@@ -736,7 +737,7 @@ namespace Flux {
                 }
 
                 auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, 1, h_offset, w_offset);
-                ids = concat_ids(ids, ref_ids, bs);
+                ids          = concat_ids(ids, ref_ids, bs);
 
                 curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
                 curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
@@ -744,7 +745,6 @@ namespace Flux {
             return ids;
         }
 
-
         // Generate positional embeddings
         std::vector<float> gen_pe(int h, int w, int patch_size, int bs, int context_len, std::vector<ggml_tensor*> ref_latents, int theta, const std::vector<int>& axes_dim) {
             std::vector<std::vector<float>> ids       = gen_ids(h, w, patch_size, bs, context_len, ref_latents);
@@ -872,8 +872,8 @@ namespace Flux {
                                          struct ggml_tensor* y,
                                          struct ggml_tensor* guidance,
                                          struct ggml_tensor* pe,
-                                         struct ggml_tensor* mod_index_arange   = NULL,
-                                         std::vector<int> skip_layers = {}) {
+                                         struct ggml_tensor* mod_index_arange = NULL,
+                                         std::vector<int> skip_layers         = {}) {
             auto img_in      = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
             auto txt_in      = std::dynamic_pointer_cast<Linear>(blocks["txt_in"]);
             auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks["final_layer"]);
@@ -887,7 +887,7 @@ namespace Flux {
                 auto distill_timestep    = ggml_nn_timestep_embedding(ctx, timesteps, 16, 10000, 1000.f);
                 auto distill_guidance    = ggml_nn_timestep_embedding(ctx, guidance, 16, 10000, 1000.f);
 
-                // auto mod_index_arange  = ggml_arange(ctx, 0, (float)mod_index_length, 1); 
+                // auto mod_index_arange  = ggml_arange(ctx, 0, (float)mod_index_length, 1);
                 // ggml_arange tot working on a lot of backends, precomputing it on CPU instead
                 GGML_ASSERT(arange != NULL);
                 auto modulation_index = ggml_nn_timestep_embedding(ctx, mod_index_arange, 32, 10000, 1000.f);  // [1, 344, 32]
@@ -962,7 +962,6 @@ namespace Flux {
 
         struct ggml_tensor* process_img(struct ggml_context* ctx,
                                         struct ggml_tensor* x) {
-
             int64_t W          = x->ne[0];
             int64_t H          = x->ne[1];
             int64_t patch_size = 2;
@@ -983,9 +982,9 @@ namespace Flux {
                                     struct ggml_tensor* y,
                                     struct ggml_tensor* guidance,
                                     struct ggml_tensor* pe,
-                                    struct ggml_tensor* mod_index_arange   = NULL,
+                                    struct ggml_tensor* mod_index_arange  = NULL,
                                     std::vector<ggml_tensor*> ref_latents = {},
-                                    std::vector<int> skip_layers = {}) {
+                                    std::vector<int> skip_layers          = {}) {
             // Forward pass of DiT.
             // x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
             // timestep: (N,) tensor of diffusion timesteps
@@ -1005,7 +1004,7 @@ namespace Flux {
             int pad_h          = (patch_size - H % patch_size) % patch_size;
             int pad_w          = (patch_size - W % patch_size) % patch_size;
 
-            auto img = process_img(ctx, x);
+            auto img            = process_img(ctx, x);
             uint64_t img_tokens = img->ne[1];
 
             if (c_concat != NULL) {
@@ -1013,7 +1012,7 @@ namespace Flux {
                 ggml_tensor* mask   = ggml_view_4d(ctx, c_concat, c_concat->ne[0], c_concat->ne[1], 8 * 8, 1, c_concat->nb[1], c_concat->nb[2], c_concat->nb[3], c_concat->nb[2] * C);
 
                 masked = process_img(ctx, masked);
-                mask = process_img(ctx, mask);
+                mask   = process_img(ctx, mask);
 
                 img = ggml_concat(ctx, img, ggml_concat(ctx, masked, mask, 0), 0);
             }
@@ -1027,9 +1026,9 @@ namespace Flux {
 
             auto out = forward_orig(ctx, img, context, timestep, y, guidance, pe, mod_index_arange, skip_layers);  // [N, num_tokens, C * patch_size * patch_size]
             if (out->ne[1] > img_tokens) {
-                out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
+                out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3));  // [num_tokens, N, C * patch_size * patch_size]
                 out = ggml_view_3d(ctx, out, out->ne[0], out->ne[1], img_tokens, out->nb[1], out->nb[2], 0);
-                out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
+                out = ggml_cont(ctx, ggml_permute(ctx, out, 0, 2, 1, 3));  // [N, h*w, C * patch_size * patch_size]
             }
 
             // rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)
@@ -1120,7 +1119,7 @@ namespace Flux {
                                         struct ggml_tensor* y,
                                         struct ggml_tensor* guidance,
                                         std::vector<ggml_tensor*> ref_latents = {},
-                                        std::vector<int> skip_layers = {}) {
+                                        std::vector<int> skip_layers          = {}) {
             GGML_ASSERT(x->ne[3] == 1);
             struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, FLUX_GRAPH_SIZE, false);
 
@@ -1139,8 +1138,8 @@ namespace Flux {
                 }
 
                 // ggml_arange is not working on some backends, precompute it
-                mod_index_arange_vec  = arange(0, 344);
-                mod_index_arange = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
+                mod_index_arange_vec = arange(0, 344);
+                mod_index_arange     = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, mod_index_arange_vec.size());
                 set_backend_tensor_data(mod_index_arange, mod_index_arange_vec.data());
             }
             y = to_backend(y);
@@ -1187,9 +1186,9 @@ namespace Flux {
                      struct ggml_tensor* y,
                      struct ggml_tensor* guidance,
                      std::vector<ggml_tensor*> ref_latents = {},
-                     struct ggml_tensor** output     = NULL,
-                     struct ggml_context* output_ctx = NULL,
-                     std::vector<int> skip_layers    = std::vector<int>()) {
+                     struct ggml_tensor** output           = NULL,
+                     struct ggml_context* output_ctx       = NULL,
+                     std::vector<int> skip_layers          = std::vector<int>()) {
             // x: [N, in_channels, h, w]
             // timesteps: [N, ]
             // context: [N, max_position, hidden_size]
diff --git a/lora.hpp b/lora.hpp
index d38c711..ee14bce 100644
--- a/lora.hpp
+++ b/lora.hpp
@@ -291,7 +291,6 @@ struct LoraModel : public GGMLRunner {
                     std::string hada_2_down_name = "";
                     std::string hada_2_up_name   = "";
 
-
                     hada_1_down_name = fk + ".hada_w1_b";
                     hada_1_up_name   = fk + ".hada_w1_a";
                     hada_1_mid_name  = fk + ".hada_t1";
@@ -414,7 +413,7 @@ struct LoraModel : public GGMLRunner {
                         }
                         lokr_w2 = ggml_merge_lora(compute_ctx, down, up);
                     }
-                    
+
                     // Technically it might be unused, but I believe it's the expected behavior
                     applied_lora_tensors.insert(alpha_name);
 
diff --git a/model.h b/model.h
index d7f9765..79c2533 100644
--- a/model.h
+++ b/model.h
@@ -12,9 +12,9 @@
 
 #include "ggml-backend.h"
 #include "ggml.h"
+#include "gguf.h"
 #include "json.hpp"
 #include "zip.h"
-#include "gguf.h"
 
 #define SD_MAX_DIMS 5
 
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index cffef32..5522287 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -48,8 +48,7 @@ const char* sampling_methods_str[] = {
     "iPNDM_v",
     "LCM",
     "DDIM \"trailing\"",
-    "TCD"
-};
+    "TCD"};
 
 /*================================================== Helper Functions ================================================*/
 
@@ -696,7 +695,7 @@ public:
             float curr_multiplier        = kv.second;
             lora_state_diff[lora_name] -= curr_multiplier;
         }
-        
+
         size_t rm = lora_state_diff.size() - lora_state.size();
         if (rm != 0) {
             LOG_INFO("Attempting to apply %lu LoRAs (removing %lu applied LoRAs)", lora_state.size(), rm);
@@ -815,11 +814,11 @@ public:
                         int start_merge_step,
                         SDCondition id_cond,
                         std::vector<ggml_tensor*> ref_latents = {},
-                        std::vector<int> skip_layers = {},
-                        float slg_scale              = 0,
-                        float skip_layer_start       = 0.01,
-                        float skip_layer_end         = 0.2,
-                        ggml_tensor* noise_mask      = nullptr) {
+                        std::vector<int> skip_layers          = {},
+                        float slg_scale                       = 0,
+                        float skip_layer_start                = 0.01,
+                        float skip_layer_end                  = 0.2,
+                        ggml_tensor* noise_mask               = nullptr) {
         LOG_DEBUG("Sample");
         struct ggml_init_params params;
         size_t data_size = ggml_row_size(init_latent->type, init_latent->ne[0]);
@@ -1973,7 +1972,6 @@ SD_API sd_image_t* img2vid(sd_ctx_t* sd_ctx,
     return result_images;
 }
 
-
 sd_image_t* edit(sd_ctx_t* sd_ctx,
                  sd_image_t* ref_images,
                  int ref_images_count,
@@ -2062,7 +2060,7 @@ sd_image_t* edit(sd_ctx_t* sd_ctx,
         }
         ref_latents.push_back(latent);
     }
-    
+
     size_t t1 = ggml_time_ms();
     LOG_INFO("encode_first_stage completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);
 
diff --git a/stable-diffusion.h b/stable-diffusion.h
index 5dc5a18..b4d6fc3 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -61,10 +61,10 @@ enum schedule_t {
 
 // same as enum ggml_type
 enum sd_type_t {
-    SD_TYPE_F32     = 0,
-    SD_TYPE_F16     = 1,
-    SD_TYPE_Q4_0    = 2,
-    SD_TYPE_Q4_1    = 3,
+    SD_TYPE_F32  = 0,
+    SD_TYPE_F16  = 1,
+    SD_TYPE_Q4_0 = 2,
+    SD_TYPE_Q4_1 = 3,
     // SD_TYPE_Q4_2 = 4, support has been removed
     // SD_TYPE_Q4_3 = 5, support has been removed
     SD_TYPE_Q5_0    = 6,
@@ -95,12 +95,12 @@ enum sd_type_t {
     // SD_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
     // SD_TYPE_Q4_0_4_8 = 32,
     // SD_TYPE_Q4_0_8_8 = 33,
-    SD_TYPE_TQ1_0   = 34,
-    SD_TYPE_TQ2_0   = 35,
+    SD_TYPE_TQ1_0 = 34,
+    SD_TYPE_TQ2_0 = 35,
     // SD_TYPE_IQ4_NL_4_4 = 36,
     // SD_TYPE_IQ4_NL_4_8 = 37,
     // SD_TYPE_IQ4_NL_8_8 = 38,
-    SD_TYPE_COUNT   = 39,
+    SD_TYPE_COUNT = 39,
 };
 
 SD_API const char* sd_type_name(enum sd_type_t type);
diff --git a/t5.hpp b/t5.hpp
index be88007..d511ef2 100644
--- a/t5.hpp
+++ b/t5.hpp
@@ -434,7 +434,7 @@ public:
                 tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);
                 weights.insert(weights.end(), length - weights.size(), 1.0);
                 if (attention_mask != nullptr) {
-                    // maybe keep some padding tokens unmasked? 
+                    // maybe keep some padding tokens unmasked?
                     attention_mask->insert(attention_mask->end(), length - attention_mask->size(), -HUGE_VALF);
                 }
             }
@@ -797,7 +797,7 @@ struct T5Runner : public GGMLRunner {
                  struct ggml_tensor* input_ids,
                  struct ggml_tensor* attention_mask,
                  ggml_tensor** output,
-                 ggml_context* output_ctx           = NULL) {
+                 ggml_context* output_ctx = NULL) {
         auto get_graph = [&]() -> struct ggml_cgraph* {
             return build_graph(input_ids, attention_mask);
         };
diff --git a/util.cpp b/util.cpp
index da11a14..631c120 100644
--- a/util.cpp
+++ b/util.cpp
@@ -112,7 +112,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
     sprintf(directoryPath, "%s\\%s\\*", currentDirectory, dir.c_str());
 
     // Find the first file in the directory
-    hFind = FindFirstFile(directoryPath, &findFileData);
+    hFind               = FindFirstFile(directoryPath, &findFileData);
     bool isAbsolutePath = false;
     // Check if the directory was found
     if (hFind == INVALID_HANDLE_VALUE) {
@@ -121,7 +121,7 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
         char directoryPathAbsolute[MAX_PATH];
         sprintf(directoryPathAbsolute, "%s*", dir.c_str());
 
-        hFind = FindFirstFile(directoryPathAbsolute, &findFileData);
+        hFind          = FindFirstFile(directoryPathAbsolute, &findFileData);
         isAbsolutePath = true;
         if (hFind == INVALID_HANDLE_VALUE) {
             printf("Absolute path was also wrong.\n");