From 28ef93c0e172a29b0dcbd131ec8e34ecbd2b98e2 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Tue, 10 Feb 2026 23:13:35 +0800
Subject: [PATCH] refactor: reorganize the file structure (#1266)

---
 CMakeLists.txt                                |  11 +-
 .../stable-diffusion.h                        |   0
 face_detect.py => script/face_detect.py       | 174 +++----
 cache_dit.hpp => src/cache_dit.hpp            |   0
 clip.hpp => src/clip.hpp                      |   0
 common.hpp => src/common.hpp                  |   0
 conditioner.hpp => src/conditioner.hpp        |   0
 control.hpp => src/control.hpp                |   0
 denoiser.hpp => src/denoiser.hpp              |   0
 .../diffusion_model.hpp                       |   0
 easycache.hpp => src/easycache.hpp            |   0
 esrgan.hpp => src/esrgan.hpp                  |   0
 flux.hpp => src/flux.hpp                      |   0
 ggml_extend.hpp => src/ggml_extend.hpp        |   0
 gguf_reader.hpp => src/gguf_reader.hpp        |   0
 gits_noise.inl => src/gits_noise.inl          |   0
 latent-preview.h => src/latent-preview.h      | 468 +++++++++---------
 llm.hpp => src/llm.hpp                        |   0
 lora.hpp => src/lora.hpp                      |   0
 ltxv.hpp => src/ltxv.hpp                      |   0
 mmdit.hpp => src/mmdit.hpp                    |   0
 model.cpp => src/model.cpp                    |   0
 model.h => src/model.h                        |   0
 .../name_conversion.cpp                       |   0
 name_conversion.h => src/name_conversion.h    |   0
 ordered_map.hpp => src/ordered_map.hpp        |   0
 pmid.hpp => src/pmid.hpp                      |   0
 preprocessing.hpp => src/preprocessing.hpp    |   0
 qwen_image.hpp => src/qwen_image.hpp          |   0
 rng.hpp => src/rng.hpp                        |   0
 rng_mt19937.hpp => src/rng_mt19937.hpp        |   0
 rng_philox.hpp => src/rng_philox.hpp          |   0
 rope.hpp => src/rope.hpp                      |   0
 .../stable-diffusion.cpp                      |   0
 t5.hpp => src/t5.hpp                          |   0
 tae.hpp => src/tae.hpp                        |   0
 tokenize_util.cpp => src/tokenize_util.cpp    |   0
 tokenize_util.h => src/tokenize_util.h        |   0
 ucache.hpp => src/ucache.hpp                  |   0
 unet.hpp => src/unet.hpp                      |   0
 upscaler.cpp => src/upscaler.cpp              |   0
 util.cpp => src/util.cpp                      |   0
 util.h => src/util.h                          |   0
 vae.hpp => src/vae.hpp                        |   0
 version.cpp => src/version.cpp                |   0
 vocab.hpp => src/vocab.hpp                    |   0
 vocab_mistral.hpp => src/vocab_mistral.hpp    |   0
 vocab_qwen.hpp => src/vocab_qwen.hpp          |   0
 vocab_umt5.hpp => src/vocab_umt5.hpp          |   0
 wan.hpp => src/wan.hpp                        |   0
 z_image.hpp => src/z_image.hpp                |   0
 51 files changed, 327 insertions(+), 326 deletions(-)
 rename stable-diffusion.h => include/stable-diffusion.h (100%)
 rename face_detect.py => script/face_detect.py (97%)
 rename cache_dit.hpp => src/cache_dit.hpp (100%)
 rename clip.hpp => src/clip.hpp (100%)
 rename common.hpp => src/common.hpp (100%)
 rename conditioner.hpp => src/conditioner.hpp (100%)
 rename control.hpp => src/control.hpp (100%)
 rename denoiser.hpp => src/denoiser.hpp (100%)
 rename diffusion_model.hpp => src/diffusion_model.hpp (100%)
 rename easycache.hpp => src/easycache.hpp (100%)
 rename esrgan.hpp => src/esrgan.hpp (100%)
 rename flux.hpp => src/flux.hpp (100%)
 rename ggml_extend.hpp => src/ggml_extend.hpp (100%)
 rename gguf_reader.hpp => src/gguf_reader.hpp (100%)
 rename gits_noise.inl => src/gits_noise.inl (100%)
 rename latent-preview.h => src/latent-preview.h (97%)
 rename llm.hpp => src/llm.hpp (100%)
 rename lora.hpp => src/lora.hpp (100%)
 rename ltxv.hpp => src/ltxv.hpp (100%)
 rename mmdit.hpp => src/mmdit.hpp (100%)
 rename model.cpp => src/model.cpp (100%)
 rename model.h => src/model.h (100%)
 rename name_conversion.cpp => src/name_conversion.cpp (100%)
 rename name_conversion.h => src/name_conversion.h (100%)
 rename ordered_map.hpp => src/ordered_map.hpp (100%)
 rename pmid.hpp => src/pmid.hpp (100%)
 rename preprocessing.hpp => src/preprocessing.hpp (100%)
 rename qwen_image.hpp => src/qwen_image.hpp (100%)
 rename rng.hpp => src/rng.hpp (100%)
 rename rng_mt19937.hpp => src/rng_mt19937.hpp (100%)
 rename rng_philox.hpp => src/rng_philox.hpp (100%)
 rename rope.hpp => src/rope.hpp (100%)
 rename stable-diffusion.cpp => src/stable-diffusion.cpp (100%)
 rename t5.hpp => src/t5.hpp (100%)
 rename tae.hpp => src/tae.hpp (100%)
 rename tokenize_util.cpp => src/tokenize_util.cpp (100%)
 rename tokenize_util.h => src/tokenize_util.h (100%)
 rename ucache.hpp => src/ucache.hpp (100%)
 rename unet.hpp => src/unet.hpp (100%)
 rename upscaler.cpp => src/upscaler.cpp (100%)
 rename util.cpp => src/util.cpp (100%)
 rename util.h => src/util.h (100%)
 rename vae.hpp => src/vae.hpp (100%)
 rename version.cpp => src/version.cpp (100%)
 rename vocab.hpp => src/vocab.hpp (100%)
 rename vocab_mistral.hpp => src/vocab_mistral.hpp (100%)
 rename vocab_qwen.hpp => src/vocab_qwen.hpp (100%)
 rename vocab_umt5.hpp => src/vocab_umt5.hpp (100%)
 rename wan.hpp => src/wan.hpp (100%)
 rename z_image.hpp => src/z_image.hpp (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e731d95..7b77225 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,9 +87,9 @@ endif()
 set(SD_LIB stable-diffusion)
 
 file(GLOB SD_LIB_SOURCES
-    "*.h"
-    "*.cpp"
-    "*.hpp"
+    "src/*.h"
+    "src/*.cpp"
+    "src/*.hpp"
 )
 
 find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
@@ -119,7 +119,7 @@ endif()
 message(STATUS "stable-diffusion.cpp commit ${SDCPP_BUILD_COMMIT}")
 
 set_property(
-  SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp
+  SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/version.cpp
   APPEND PROPERTY COMPILE_DEFINITIONS
   SDCPP_BUILD_COMMIT=${SDCPP_BUILD_COMMIT} SDCPP_BUILD_VERSION=${SDCPP_BUILD_VERSION}
 )
@@ -182,6 +182,7 @@ endif()
 add_subdirectory(thirdparty)
 
 target_link_libraries(${SD_LIB} PUBLIC ggml zip)
+target_include_directories(${SD_LIB} PUBLIC . include)
 target_include_directories(${SD_LIB} PUBLIC . thirdparty)
 target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)
 
@@ -190,7 +191,7 @@ if (SD_BUILD_EXAMPLES)
     add_subdirectory(examples)
 endif()
 
-set(SD_PUBLIC_HEADERS stable-diffusion.h)
+set(SD_PUBLIC_HEADERS include/stable-diffusion.h)
 set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")
 
 install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
diff --git a/stable-diffusion.h b/include/stable-diffusion.h
similarity index 100%
rename from stable-diffusion.h
rename to include/stable-diffusion.h
diff --git a/face_detect.py b/script/face_detect.py
similarity index 97%
rename from face_detect.py
rename to script/face_detect.py
index 7131af3..e7a3eae 100644
--- a/face_detect.py
+++ b/script/face_detect.py
@@ -1,88 +1,88 @@
-import os
-import sys
-
-import numpy as np
-import torch
-from diffusers.utils import load_image
-# pip install insightface==0.7.3
-from insightface.app import FaceAnalysis
-from insightface.data import get_image as ins_get_image
-from safetensors.torch import save_file
-
-### 
-# https://github.com/cubiq/ComfyUI_IPAdapter_plus/issues/165#issue-2055829543
-###
-class FaceAnalysis2(FaceAnalysis):
-    # NOTE: allows setting det_size for each detection call.
-    # the model allows it but the wrapping code from insightface
-    # doesn't show it, and people end up loading duplicate models
-    # for different sizes where there is absolutely no need to
-    def get(self, img, max_num=0, det_size=(640, 640)):
-        if det_size is not None:
-            self.det_model.input_size = det_size
-
-        return super().get(img, max_num)
-
-def analyze_faces(face_analysis: FaceAnalysis, img_data: np.ndarray, det_size=(640, 640)):
-    # NOTE: try detect faces, if no faces detected, lower det_size until it does
-    detection_sizes = [None] + [(size, size) for size in range(640, 256, -64)] + [(256, 256)]
-
-    for size in detection_sizes:
-        faces = face_analysis.get(img_data, det_size=size)
-        if len(faces) > 0:
-            return faces
-
-    return []
-
-if __name__ == "__main__":
-    #face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition'])
-    face_detector = FaceAnalysis2(providers=['CPUExecutionProvider'], allowed_modules=['detection', 'recognition'])
-    face_detector.prepare(ctx_id=0, det_size=(640, 640))
-    #input_folder_name = './scarletthead_woman'
-    input_folder_name = sys.argv[1]
-    image_basename_list = os.listdir(input_folder_name)
-    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])
-
-    input_id_images = []
-    for image_path in image_path_list:
-        input_id_images.append(load_image(image_path))
-    
-    id_embed_list = []
-    
-    for img in input_id_images:
-        img = np.array(img)
-        img = img[:, :, ::-1]
-        faces = analyze_faces(face_detector, img)
-        if len(faces) > 0:
-            id_embed_list.append(torch.from_numpy((faces[0]['embedding'])))
-    
-    if len(id_embed_list) == 0:
-        raise ValueError(f"No face detected in input image pool")
-    
-    id_embeds = torch.stack(id_embed_list)    
-    
-    # for r in id_embeds:
-    #     print(r)
-    # #torch.save(id_embeds, input_folder_name+'/id_embeds.pt');
-    # weights = dict()
-    # weights["id_embeds"] = id_embeds
-    # save_file(weights, input_folder_name+'/id_embeds.safetensors')
-
-    binary_data = id_embeds.numpy().tobytes()
-    two = 4
-    zero = 0
-    one = 1
-    tensor_name = "id_embeds"
-# Write binary data to a file
-    with open(input_folder_name+'/id_embeds.bin', "wb") as f:
-        f.write(two.to_bytes(4, byteorder='little'))
-        f.write((len(tensor_name)).to_bytes(4, byteorder='little'))
-        f.write(zero.to_bytes(4, byteorder='little'))
-        f.write((id_embeds.shape[1]).to_bytes(4, byteorder='little'))
-        f.write((id_embeds.shape[0]).to_bytes(4, byteorder='little'))
-        f.write(one.to_bytes(4, byteorder='little'))
-        f.write(one.to_bytes(4, byteorder='little'))
-        f.write(tensor_name.encode('ascii'))
-        f.write(binary_data)
-
+import os
+import sys
+
+import numpy as np
+import torch
+from diffusers.utils import load_image
+# pip install insightface==0.7.3
+from insightface.app import FaceAnalysis
+from insightface.data import get_image as ins_get_image
+from safetensors.torch import save_file
+
+### 
+# https://github.com/cubiq/ComfyUI_IPAdapter_plus/issues/165#issue-2055829543
+###
+class FaceAnalysis2(FaceAnalysis):
+    # NOTE: allows setting det_size for each detection call.
+    # the model allows it but the wrapping code from insightface
+    # doesn't show it, and people end up loading duplicate models
+    # for different sizes where there is absolutely no need to
+    def get(self, img, max_num=0, det_size=(640, 640)):
+        if det_size is not None:
+            self.det_model.input_size = det_size
+
+        return super().get(img, max_num)
+
+def analyze_faces(face_analysis: FaceAnalysis, img_data: np.ndarray, det_size=(640, 640)):
+    # NOTE: try detect faces, if no faces detected, lower det_size until it does
+    detection_sizes = [None] + [(size, size) for size in range(640, 256, -64)] + [(256, 256)]
+
+    for size in detection_sizes:
+        faces = face_analysis.get(img_data, det_size=size)
+        if len(faces) > 0:
+            return faces
+
+    return []
+
+if __name__ == "__main__":
+    #face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition'])
+    face_detector = FaceAnalysis2(providers=['CPUExecutionProvider'], allowed_modules=['detection', 'recognition'])
+    face_detector.prepare(ctx_id=0, det_size=(640, 640))
+    #input_folder_name = './scarletthead_woman'
+    input_folder_name = sys.argv[1]
+    image_basename_list = os.listdir(input_folder_name)
+    image_path_list = sorted([os.path.join(input_folder_name, basename) for basename in image_basename_list])
+
+    input_id_images = []
+    for image_path in image_path_list:
+        input_id_images.append(load_image(image_path))
+    
+    id_embed_list = []
+    
+    for img in input_id_images:
+        img = np.array(img)
+        img = img[:, :, ::-1]
+        faces = analyze_faces(face_detector, img)
+        if len(faces) > 0:
+            id_embed_list.append(torch.from_numpy((faces[0]['embedding'])))
+    
+    if len(id_embed_list) == 0:
+        raise ValueError(f"No face detected in input image pool")
+    
+    id_embeds = torch.stack(id_embed_list)    
+    
+    # for r in id_embeds:
+    #     print(r)
+    # #torch.save(id_embeds, input_folder_name+'/id_embeds.pt');
+    # weights = dict()
+    # weights["id_embeds"] = id_embeds
+    # save_file(weights, input_folder_name+'/id_embeds.safetensors')
+
+    binary_data = id_embeds.numpy().tobytes()
+    two = 4
+    zero = 0
+    one = 1
+    tensor_name = "id_embeds"
+# Write binary data to a file
+    with open(input_folder_name+'/id_embeds.bin', "wb") as f:
+        f.write(two.to_bytes(4, byteorder='little'))
+        f.write((len(tensor_name)).to_bytes(4, byteorder='little'))
+        f.write(zero.to_bytes(4, byteorder='little'))
+        f.write((id_embeds.shape[1]).to_bytes(4, byteorder='little'))
+        f.write((id_embeds.shape[0]).to_bytes(4, byteorder='little'))
+        f.write(one.to_bytes(4, byteorder='little'))
+        f.write(one.to_bytes(4, byteorder='little'))
+        f.write(tensor_name.encode('ascii'))
+        f.write(binary_data)
+
     
\ No newline at end of file
diff --git a/cache_dit.hpp b/src/cache_dit.hpp
similarity index 100%
rename from cache_dit.hpp
rename to src/cache_dit.hpp
diff --git a/clip.hpp b/src/clip.hpp
similarity index 100%
rename from clip.hpp
rename to src/clip.hpp
diff --git a/common.hpp b/src/common.hpp
similarity index 100%
rename from common.hpp
rename to src/common.hpp
diff --git a/conditioner.hpp b/src/conditioner.hpp
similarity index 100%
rename from conditioner.hpp
rename to src/conditioner.hpp
diff --git a/control.hpp b/src/control.hpp
similarity index 100%
rename from control.hpp
rename to src/control.hpp
diff --git a/denoiser.hpp b/src/denoiser.hpp
similarity index 100%
rename from denoiser.hpp
rename to src/denoiser.hpp
diff --git a/diffusion_model.hpp b/src/diffusion_model.hpp
similarity index 100%
rename from diffusion_model.hpp
rename to src/diffusion_model.hpp
diff --git a/easycache.hpp b/src/easycache.hpp
similarity index 100%
rename from easycache.hpp
rename to src/easycache.hpp
diff --git a/esrgan.hpp b/src/esrgan.hpp
similarity index 100%
rename from esrgan.hpp
rename to src/esrgan.hpp
diff --git a/flux.hpp b/src/flux.hpp
similarity index 100%
rename from flux.hpp
rename to src/flux.hpp
diff --git a/ggml_extend.hpp b/src/ggml_extend.hpp
similarity index 100%
rename from ggml_extend.hpp
rename to src/ggml_extend.hpp
diff --git a/gguf_reader.hpp b/src/gguf_reader.hpp
similarity index 100%
rename from gguf_reader.hpp
rename to src/gguf_reader.hpp
diff --git a/gits_noise.inl b/src/gits_noise.inl
similarity index 100%
rename from gits_noise.inl
rename to src/gits_noise.inl
diff --git a/latent-preview.h b/src/latent-preview.h
similarity index 97%
rename from latent-preview.h
rename to src/latent-preview.h
index 76e1741..85c8e0d 100644
--- a/latent-preview.h
+++ b/src/latent-preview.h
@@ -1,234 +1,234 @@
-#include <cstddef>
-#include <cstdint>
-#include "ggml.h"
-
-const float wan_21_latent_rgb_proj[16][3] = {
-    {0.015123f, -0.148418f, 0.479828f},
-    {0.003652f, -0.010680f, -0.037142f},
-    {0.212264f, 0.063033f, 0.016779f},
-    {0.232999f, 0.406476f, 0.220125f},
-    {-0.051864f, -0.082384f, -0.069396f},
-    {0.085005f, -0.161492f, 0.010689f},
-    {-0.245369f, -0.506846f, -0.117010f},
-    {-0.151145f, 0.017721f, 0.007207f},
-    {-0.293239f, -0.207936f, -0.421135f},
-    {-0.187721f, 0.050783f, 0.177649f},
-    {-0.013067f, 0.265964f, 0.166578f},
-    {0.028327f, 0.109329f, 0.108642f},
-    {-0.205343f, 0.043991f, 0.148914f},
-    {0.014307f, -0.048647f, -0.007219f},
-    {0.217150f, 0.053074f, 0.319923f},
-    {0.155357f, 0.083156f, 0.064780f}};
-float wan_21_latent_rgb_bias[3] = {-0.270270f, -0.234976f, -0.456853f};
-
-const float wan_22_latent_rgb_proj[48][3] = {
-    {0.017126f, -0.027230f, -0.019257f},
-    {-0.113739f, -0.028715f, -0.022885f},
-    {-0.000106f, 0.021494f, 0.004629f},
-    {-0.013273f, -0.107137f, -0.033638f},
-    {-0.000381f, 0.000279f, 0.025877f},
-    {-0.014216f, -0.003975f, 0.040528f},
-    {0.001638f, -0.000748f, 0.011022f},
-    {0.029238f, -0.006697f, 0.035933f},
-    {0.021641f, -0.015874f, 0.040531f},
-    {-0.101984f, -0.070160f, -0.028855f},
-    {0.033207f, -0.021068f, 0.002663f},
-    {-0.104711f, 0.121673f, 0.102981f},
-    {0.082647f, -0.004991f, 0.057237f},
-    {-0.027375f, 0.031581f, 0.006868f},
-    {-0.045434f, 0.029444f, 0.019287f},
-    {-0.046572f, -0.012537f, 0.006675f},
-    {0.074709f, 0.033690f, 0.025289f},
-    {-0.008251f, -0.002745f, -0.006999f},
-    {0.012685f, -0.061856f, -0.048658f},
-    {0.042304f, -0.007039f, 0.000295f},
-    {-0.007644f, -0.060843f, -0.033142f},
-    {0.159909f, 0.045628f, 0.367541f},
-    {0.095171f, 0.086438f, 0.010271f},
-    {0.006812f, 0.019643f, 0.029637f},
-    {0.003467f, -0.010705f, 0.014252f},
-    {-0.099681f, -0.066272f, -0.006243f},
-    {0.047357f, 0.037040f, 0.000185f},
-    {-0.041797f, -0.089225f, -0.032257f},
-    {0.008928f, 0.017028f, 0.018684f},
-    {-0.042255f, 0.016045f, 0.006849f},
-    {0.011268f, 0.036462f, 0.037387f},
-    {0.011553f, -0.016375f, -0.048589f},
-    {0.046266f, -0.027189f, 0.056979f},
-    {0.009640f, -0.017576f, 0.030324f},
-    {-0.045794f, -0.036083f, -0.010616f},
-    {0.022418f, 0.039783f, -0.032939f},
-    {-0.052714f, -0.015525f, 0.007438f},
-    {0.193004f, 0.223541f, 0.264175f},
-    {-0.059406f, -0.008188f, 0.022867f},
-    {-0.156742f, -0.263791f, -0.007385f},
-    {-0.015717f, 0.016570f, 0.033969f},
-    {0.037969f, 0.109835f, 0.200449f},
-    {-0.000782f, -0.009566f, -0.008058f},
-    {0.010709f, 0.052960f, -0.044195f},
-    {0.017271f, 0.045839f, 0.034569f},
-    {0.009424f, 0.013088f, -0.001714f},
-    {-0.024805f, -0.059378f, -0.033756f},
-    {-0.078293f, 0.029070f, 0.026129f}};
-float wan_22_latent_rgb_bias[3] = {0.013160f, -0.096492f, -0.071323f};
-
-const float flux_latent_rgb_proj[16][3] = {
-    {-0.041168f, 0.019917f, 0.097253f},
-    {0.028096f, 0.026730f, 0.129576f},
-    {0.065618f, -0.067950f, -0.014651f},
-    {-0.012998f, -0.014762f, 0.081251f},
-    {0.078567f, 0.059296f, -0.024687f},
-    {-0.015987f, -0.003697f, 0.005012f},
-    {0.033605f, 0.138999f, 0.068517f},
-    {-0.024450f, -0.063567f, -0.030101f},
-    {-0.040194f, -0.016710f, 0.127185f},
-    {0.112681f, 0.088764f, -0.041940f},
-    {-0.023498f, 0.093664f, 0.025543f},
-    {0.082899f, 0.048320f, 0.007491f},
-    {0.075712f, 0.074139f, 0.081965f},
-    {-0.143501f, 0.018263f, -0.136138f},
-    {-0.025767f, -0.082035f, -0.040023f},
-    {-0.111849f, -0.055589f, -0.032361f}};
-float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
-
-const float flux2_latent_rgb_proj[32][3] = {
-    {0.000736f, -0.008385f, -0.019710f},
-    {-0.001352f, -0.016392f, 0.020693f},
-    {-0.006376f, 0.002428f, 0.036736f},
-    {0.039384f, 0.074167f, 0.119789f},
-    {0.007464f, -0.005705f, -0.004734f},
-    {-0.004086f, 0.005287f, -0.000409f},
-    {-0.032835f, 0.050802f, -0.028120f},
-    {-0.003158f, -0.000835f, 0.000406f},
-    {-0.112840f, -0.084337f, -0.023083f},
-    {0.001462f, -0.006656f, 0.000549f},
-    {-0.009980f, -0.007480f, 0.009702f},
-    {0.032540f, 0.000214f, -0.061388f},
-    {0.011023f, 0.000694f, 0.007143f},
-    {-0.001468f, -0.006723f, -0.001678f},
-    {-0.005921f, -0.010320f, -0.003907f},
-    {-0.028434f, 0.027584f, 0.018457f},
-    {0.014349f, 0.011523f, 0.000441f},
-    {0.009874f, 0.003081f, 0.001507f},
-    {0.002218f, 0.005712f, 0.001563f},
-    {0.053010f, -0.019844f, 0.008683f},
-    {-0.002507f, 0.005384f, 0.000938f},
-    {-0.002177f, -0.011366f, 0.003559f},
-    {-0.000261f, 0.015121f, -0.003240f},
-    {-0.003944f, -0.002083f, 0.005043f},
-    {-0.009138f, 0.011336f, 0.003781f},
-    {0.011429f, 0.003985f, -0.003855f},
-    {0.010518f, -0.005586f, 0.010131f},
-    {0.007883f, 0.002912f, -0.001473f},
-    {-0.003318f, -0.003160f, 0.003684f},
-    {-0.034560f, -0.008740f, 0.012996f},
-    {0.000166f, 0.001079f, -0.012153f},
-    {0.017772f, 0.000937f, -0.011953f}};
-float flux2_latent_rgb_bias[3] = {-0.028738f, -0.098463f, -0.107619f};
-
-// This one was taken straight from
-// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
-// (MiT Licence)
-const float sd3_latent_rgb_proj[16][3] = {
-    {-0.0645f, 0.0177f, 0.1052f},
-    {0.0028f, 0.0312f, 0.0650f},
-    {0.1848f, 0.0762f, 0.0360f},
-    {0.0944f, 0.0360f, 0.0889f},
-    {0.0897f, 0.0506f, -0.0364f},
-    {-0.0020f, 0.1203f, 0.0284f},
-    {0.0855f, 0.0118f, 0.0283f},
-    {-0.0539f, 0.0658f, 0.1047f},
-    {-0.0057f, 0.0116f, 0.0700f},
-    {-0.0412f, 0.0281f, -0.0039f},
-    {0.1106f, 0.1171f, 0.1220f},
-    {-0.0248f, 0.0682f, -0.0481f},
-    {0.0815f, 0.0846f, 0.1207f},
-    {-0.0120f, -0.0055f, -0.0867f},
-    {-0.0749f, -0.0634f, -0.0456f},
-    {-0.1418f, -0.1457f, -0.1259f},
-};
-float sd3_latent_rgb_bias[3] = {0, 0, 0};
-
-const float sdxl_latent_rgb_proj[4][3] = {
-    {0.258303f, 0.277640f, 0.329699f},
-    {-0.299701f, 0.105446f, 0.014194f},
-    {0.050522f, 0.186163f, -0.143257f},
-    {-0.211938f, -0.149892f, -0.080036f}};
-float sdxl_latent_rgb_bias[3] = {0.144381f, -0.033313f, 0.007061f};
-
-const float sd_latent_rgb_proj[4][3] = {
-    {0.337366f, 0.216344f, 0.257386f},
-    {0.165636f, 0.386828f, 0.046994f},
-    {-0.267803f, 0.237036f, 0.223517f},
-    {-0.178022f, -0.200862f, -0.678514f}};
-float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
-
-void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
-    size_t buffer_head = 0;
-
-    uint32_t latent_width  = static_cast<uint32_t>(latents->ne[0]);
-    uint32_t latent_height = static_cast<uint32_t>(latents->ne[1]);
-    uint32_t dim           = static_cast<uint32_t>(latents->ne[ggml_n_dims(latents) - 1]);
-    uint32_t frames        = 1;
-    if (ggml_n_dims(latents) == 4) {
-        frames = static_cast<uint32_t>(latents->ne[2]);
-    }
-
-    uint32_t rgb_width  = latent_width * patch_size;
-    uint32_t rgb_height = latent_height * patch_size;
-
-    uint32_t unpatched_dim = dim / (patch_size * patch_size);
-
-    for (uint32_t k = 0; k < frames; k++) {
-        for (uint32_t rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
-            for (uint32_t rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
-                int latent_x = rgb_x / patch_size;
-                int latent_y = rgb_y / patch_size;
-
-                int channel_offset = 0;
-                if (patch_size > 1) {
-                    channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
-                }
-
-                size_t latent_id = (latent_x * latents->nb[0] + latent_y * latents->nb[1] + k * latents->nb[2]);
-
-                // should be incremented by 1 for each pixel
-                size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
-
-                float r = 0, g = 0, b = 0;
-                if (latent_rgb_proj != nullptr) {
-                    for (uint32_t d = 0; d < unpatched_dim; d++) {
-                        float value = *(float*)((char*)latents->data + latent_id + (d * patch_size * patch_size + channel_offset) * latents->nb[ggml_n_dims(latents) - 1]);
-                        r += value * latent_rgb_proj[d][0];
-                        g += value * latent_rgb_proj[d][1];
-                        b += value * latent_rgb_proj[d][2];
-                    }
-                } else {
-                    // interpret first 3 channels as RGB
-                    r = *(float*)((char*)latents->data + latent_id + 0 * latents->nb[ggml_n_dims(latents) - 1]);
-                    g = *(float*)((char*)latents->data + latent_id + 1 * latents->nb[ggml_n_dims(latents) - 1]);
-                    b = *(float*)((char*)latents->data + latent_id + 2 * latents->nb[ggml_n_dims(latents) - 1]);
-                }
-                if (latent_rgb_bias != nullptr) {
-                    // bias
-                    r += latent_rgb_bias[0];
-                    g += latent_rgb_bias[1];
-                    b += latent_rgb_bias[2];
-                }
-                // change range
-                r = r * .5f + .5f;
-                g = g * .5f + .5f;
-                b = b * .5f + .5f;
-
-                // clamp rgb values to [0,1] range
-                r = r >= 0 ? r <= 1 ? r : 1 : 0;
-                g = g >= 0 ? g <= 1 ? g : 1 : 0;
-                b = b >= 0 ? b <= 1 ? b : 1 : 0;
-
-                buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
-                buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
-                buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
-            }
-        }
-    }
-}
+#include <cstddef>
+#include <cstdint>
+#include "ggml.h"
+
+const float wan_21_latent_rgb_proj[16][3] = {
+    {0.015123f, -0.148418f, 0.479828f},
+    {0.003652f, -0.010680f, -0.037142f},
+    {0.212264f, 0.063033f, 0.016779f},
+    {0.232999f, 0.406476f, 0.220125f},
+    {-0.051864f, -0.082384f, -0.069396f},
+    {0.085005f, -0.161492f, 0.010689f},
+    {-0.245369f, -0.506846f, -0.117010f},
+    {-0.151145f, 0.017721f, 0.007207f},
+    {-0.293239f, -0.207936f, -0.421135f},
+    {-0.187721f, 0.050783f, 0.177649f},
+    {-0.013067f, 0.265964f, 0.166578f},
+    {0.028327f, 0.109329f, 0.108642f},
+    {-0.205343f, 0.043991f, 0.148914f},
+    {0.014307f, -0.048647f, -0.007219f},
+    {0.217150f, 0.053074f, 0.319923f},
+    {0.155357f, 0.083156f, 0.064780f}};
+float wan_21_latent_rgb_bias[3] = {-0.270270f, -0.234976f, -0.456853f};
+
+const float wan_22_latent_rgb_proj[48][3] = {
+    {0.017126f, -0.027230f, -0.019257f},
+    {-0.113739f, -0.028715f, -0.022885f},
+    {-0.000106f, 0.021494f, 0.004629f},
+    {-0.013273f, -0.107137f, -0.033638f},
+    {-0.000381f, 0.000279f, 0.025877f},
+    {-0.014216f, -0.003975f, 0.040528f},
+    {0.001638f, -0.000748f, 0.011022f},
+    {0.029238f, -0.006697f, 0.035933f},
+    {0.021641f, -0.015874f, 0.040531f},
+    {-0.101984f, -0.070160f, -0.028855f},
+    {0.033207f, -0.021068f, 0.002663f},
+    {-0.104711f, 0.121673f, 0.102981f},
+    {0.082647f, -0.004991f, 0.057237f},
+    {-0.027375f, 0.031581f, 0.006868f},
+    {-0.045434f, 0.029444f, 0.019287f},
+    {-0.046572f, -0.012537f, 0.006675f},
+    {0.074709f, 0.033690f, 0.025289f},
+    {-0.008251f, -0.002745f, -0.006999f},
+    {0.012685f, -0.061856f, -0.048658f},
+    {0.042304f, -0.007039f, 0.000295f},
+    {-0.007644f, -0.060843f, -0.033142f},
+    {0.159909f, 0.045628f, 0.367541f},
+    {0.095171f, 0.086438f, 0.010271f},
+    {0.006812f, 0.019643f, 0.029637f},
+    {0.003467f, -0.010705f, 0.014252f},
+    {-0.099681f, -0.066272f, -0.006243f},
+    {0.047357f, 0.037040f, 0.000185f},
+    {-0.041797f, -0.089225f, -0.032257f},
+    {0.008928f, 0.017028f, 0.018684f},
+    {-0.042255f, 0.016045f, 0.006849f},
+    {0.011268f, 0.036462f, 0.037387f},
+    {0.011553f, -0.016375f, -0.048589f},
+    {0.046266f, -0.027189f, 0.056979f},
+    {0.009640f, -0.017576f, 0.030324f},
+    {-0.045794f, -0.036083f, -0.010616f},
+    {0.022418f, 0.039783f, -0.032939f},
+    {-0.052714f, -0.015525f, 0.007438f},
+    {0.193004f, 0.223541f, 0.264175f},
+    {-0.059406f, -0.008188f, 0.022867f},
+    {-0.156742f, -0.263791f, -0.007385f},
+    {-0.015717f, 0.016570f, 0.033969f},
+    {0.037969f, 0.109835f, 0.200449f},
+    {-0.000782f, -0.009566f, -0.008058f},
+    {0.010709f, 0.052960f, -0.044195f},
+    {0.017271f, 0.045839f, 0.034569f},
+    {0.009424f, 0.013088f, -0.001714f},
+    {-0.024805f, -0.059378f, -0.033756f},
+    {-0.078293f, 0.029070f, 0.026129f}};
+float wan_22_latent_rgb_bias[3] = {0.013160f, -0.096492f, -0.071323f};
+
+const float flux_latent_rgb_proj[16][3] = {
+    {-0.041168f, 0.019917f, 0.097253f},
+    {0.028096f, 0.026730f, 0.129576f},
+    {0.065618f, -0.067950f, -0.014651f},
+    {-0.012998f, -0.014762f, 0.081251f},
+    {0.078567f, 0.059296f, -0.024687f},
+    {-0.015987f, -0.003697f, 0.005012f},
+    {0.033605f, 0.138999f, 0.068517f},
+    {-0.024450f, -0.063567f, -0.030101f},
+    {-0.040194f, -0.016710f, 0.127185f},
+    {0.112681f, 0.088764f, -0.041940f},
+    {-0.023498f, 0.093664f, 0.025543f},
+    {0.082899f, 0.048320f, 0.007491f},
+    {0.075712f, 0.074139f, 0.081965f},
+    {-0.143501f, 0.018263f, -0.136138f},
+    {-0.025767f, -0.082035f, -0.040023f},
+    {-0.111849f, -0.055589f, -0.032361f}};
+float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
+
+const float flux2_latent_rgb_proj[32][3] = {
+    {0.000736f, -0.008385f, -0.019710f},
+    {-0.001352f, -0.016392f, 0.020693f},
+    {-0.006376f, 0.002428f, 0.036736f},
+    {0.039384f, 0.074167f, 0.119789f},
+    {0.007464f, -0.005705f, -0.004734f},
+    {-0.004086f, 0.005287f, -0.000409f},
+    {-0.032835f, 0.050802f, -0.028120f},
+    {-0.003158f, -0.000835f, 0.000406f},
+    {-0.112840f, -0.084337f, -0.023083f},
+    {0.001462f, -0.006656f, 0.000549f},
+    {-0.009980f, -0.007480f, 0.009702f},
+    {0.032540f, 0.000214f, -0.061388f},
+    {0.011023f, 0.000694f, 0.007143f},
+    {-0.001468f, -0.006723f, -0.001678f},
+    {-0.005921f, -0.010320f, -0.003907f},
+    {-0.028434f, 0.027584f, 0.018457f},
+    {0.014349f, 0.011523f, 0.000441f},
+    {0.009874f, 0.003081f, 0.001507f},
+    {0.002218f, 0.005712f, 0.001563f},
+    {0.053010f, -0.019844f, 0.008683f},
+    {-0.002507f, 0.005384f, 0.000938f},
+    {-0.002177f, -0.011366f, 0.003559f},
+    {-0.000261f, 0.015121f, -0.003240f},
+    {-0.003944f, -0.002083f, 0.005043f},
+    {-0.009138f, 0.011336f, 0.003781f},
+    {0.011429f, 0.003985f, -0.003855f},
+    {0.010518f, -0.005586f, 0.010131f},
+    {0.007883f, 0.002912f, -0.001473f},
+    {-0.003318f, -0.003160f, 0.003684f},
+    {-0.034560f, -0.008740f, 0.012996f},
+    {0.000166f, 0.001079f, -0.012153f},
+    {0.017772f, 0.000937f, -0.011953f}};
+float flux2_latent_rgb_bias[3] = {-0.028738f, -0.098463f, -0.107619f};
+
+// This one was taken straight from
+// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
+// (MiT Licence)
+const float sd3_latent_rgb_proj[16][3] = {
+    {-0.0645f, 0.0177f, 0.1052f},
+    {0.0028f, 0.0312f, 0.0650f},
+    {0.1848f, 0.0762f, 0.0360f},
+    {0.0944f, 0.0360f, 0.0889f},
+    {0.0897f, 0.0506f, -0.0364f},
+    {-0.0020f, 0.1203f, 0.0284f},
+    {0.0855f, 0.0118f, 0.0283f},
+    {-0.0539f, 0.0658f, 0.1047f},
+    {-0.0057f, 0.0116f, 0.0700f},
+    {-0.0412f, 0.0281f, -0.0039f},
+    {0.1106f, 0.1171f, 0.1220f},
+    {-0.0248f, 0.0682f, -0.0481f},
+    {0.0815f, 0.0846f, 0.1207f},
+    {-0.0120f, -0.0055f, -0.0867f},
+    {-0.0749f, -0.0634f, -0.0456f},
+    {-0.1418f, -0.1457f, -0.1259f},
+};
+float sd3_latent_rgb_bias[3] = {0, 0, 0};
+
+const float sdxl_latent_rgb_proj[4][3] = {
+    {0.258303f, 0.277640f, 0.329699f},
+    {-0.299701f, 0.105446f, 0.014194f},
+    {0.050522f, 0.186163f, -0.143257f},
+    {-0.211938f, -0.149892f, -0.080036f}};
+float sdxl_latent_rgb_bias[3] = {0.144381f, -0.033313f, 0.007061f};
+
+const float sd_latent_rgb_proj[4][3] = {
+    {0.337366f, 0.216344f, 0.257386f},
+    {0.165636f, 0.386828f, 0.046994f},
+    {-0.267803f, 0.237036f, 0.223517f},
+    {-0.178022f, -0.200862f, -0.678514f}};
+float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
+
+void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
+    size_t buffer_head = 0;
+
+    uint32_t latent_width  = static_cast<uint32_t>(latents->ne[0]);
+    uint32_t latent_height = static_cast<uint32_t>(latents->ne[1]);
+    uint32_t dim           = static_cast<uint32_t>(latents->ne[ggml_n_dims(latents) - 1]);
+    uint32_t frames        = 1;
+    if (ggml_n_dims(latents) == 4) {
+        frames = static_cast<uint32_t>(latents->ne[2]);
+    }
+
+    uint32_t rgb_width  = latent_width * patch_size;
+    uint32_t rgb_height = latent_height * patch_size;
+
+    uint32_t unpatched_dim = dim / (patch_size * patch_size);
+
+    for (uint32_t k = 0; k < frames; k++) {
+        for (uint32_t rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
+            for (uint32_t rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
+                int latent_x = rgb_x / patch_size;
+                int latent_y = rgb_y / patch_size;
+
+                int channel_offset = 0;
+                if (patch_size > 1) {
+                    channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
+                }
+
+                size_t latent_id = (latent_x * latents->nb[0] + latent_y * latents->nb[1] + k * latents->nb[2]);
+
+                // should be incremented by 1 for each pixel
+                size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
+
+                float r = 0, g = 0, b = 0;
+                if (latent_rgb_proj != nullptr) {
+                    for (uint32_t d = 0; d < unpatched_dim; d++) {
+                        float value = *(float*)((char*)latents->data + latent_id + (d * patch_size * patch_size + channel_offset) * latents->nb[ggml_n_dims(latents) - 1]);
+                        r += value * latent_rgb_proj[d][0];
+                        g += value * latent_rgb_proj[d][1];
+                        b += value * latent_rgb_proj[d][2];
+                    }
+                } else {
+                    // interpret first 3 channels as RGB
+                    r = *(float*)((char*)latents->data + latent_id + 0 * latents->nb[ggml_n_dims(latents) - 1]);
+                    g = *(float*)((char*)latents->data + latent_id + 1 * latents->nb[ggml_n_dims(latents) - 1]);
+                    b = *(float*)((char*)latents->data + latent_id + 2 * latents->nb[ggml_n_dims(latents) - 1]);
+                }
+                if (latent_rgb_bias != nullptr) {
+                    // bias
+                    r += latent_rgb_bias[0];
+                    g += latent_rgb_bias[1];
+                    b += latent_rgb_bias[2];
+                }
+                // change range
+                r = r * .5f + .5f;
+                g = g * .5f + .5f;
+                b = b * .5f + .5f;
+
+                // clamp rgb values to [0,1] range
+                r = r >= 0 ? r <= 1 ? r : 1 : 0;
+                g = g >= 0 ? g <= 1 ? g : 1 : 0;
+                b = b >= 0 ? b <= 1 ? b : 1 : 0;
+
+                buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
+                buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
+                buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
+            }
+        }
+    }
+}
diff --git a/llm.hpp b/src/llm.hpp
similarity index 100%
rename from llm.hpp
rename to src/llm.hpp
diff --git a/lora.hpp b/src/lora.hpp
similarity index 100%
rename from lora.hpp
rename to src/lora.hpp
diff --git a/ltxv.hpp b/src/ltxv.hpp
similarity index 100%
rename from ltxv.hpp
rename to src/ltxv.hpp
diff --git a/mmdit.hpp b/src/mmdit.hpp
similarity index 100%
rename from mmdit.hpp
rename to src/mmdit.hpp
diff --git a/model.cpp b/src/model.cpp
similarity index 100%
rename from model.cpp
rename to src/model.cpp
diff --git a/model.h b/src/model.h
similarity index 100%
rename from model.h
rename to src/model.h
diff --git a/name_conversion.cpp b/src/name_conversion.cpp
similarity index 100%
rename from name_conversion.cpp
rename to src/name_conversion.cpp
diff --git a/name_conversion.h b/src/name_conversion.h
similarity index 100%
rename from name_conversion.h
rename to src/name_conversion.h
diff --git a/ordered_map.hpp b/src/ordered_map.hpp
similarity index 100%
rename from ordered_map.hpp
rename to src/ordered_map.hpp
diff --git a/pmid.hpp b/src/pmid.hpp
similarity index 100%
rename from pmid.hpp
rename to src/pmid.hpp
diff --git a/preprocessing.hpp b/src/preprocessing.hpp
similarity index 100%
rename from preprocessing.hpp
rename to src/preprocessing.hpp
diff --git a/qwen_image.hpp b/src/qwen_image.hpp
similarity index 100%
rename from qwen_image.hpp
rename to src/qwen_image.hpp
diff --git a/rng.hpp b/src/rng.hpp
similarity index 100%
rename from rng.hpp
rename to src/rng.hpp
diff --git a/rng_mt19937.hpp b/src/rng_mt19937.hpp
similarity index 100%
rename from rng_mt19937.hpp
rename to src/rng_mt19937.hpp
diff --git a/rng_philox.hpp b/src/rng_philox.hpp
similarity index 100%
rename from rng_philox.hpp
rename to src/rng_philox.hpp
diff --git a/rope.hpp b/src/rope.hpp
similarity index 100%
rename from rope.hpp
rename to src/rope.hpp
diff --git a/stable-diffusion.cpp b/src/stable-diffusion.cpp
similarity index 100%
rename from stable-diffusion.cpp
rename to src/stable-diffusion.cpp
diff --git a/t5.hpp b/src/t5.hpp
similarity index 100%
rename from t5.hpp
rename to src/t5.hpp
diff --git a/tae.hpp b/src/tae.hpp
similarity index 100%
rename from tae.hpp
rename to src/tae.hpp
diff --git a/tokenize_util.cpp b/src/tokenize_util.cpp
similarity index 100%
rename from tokenize_util.cpp
rename to src/tokenize_util.cpp
diff --git a/tokenize_util.h b/src/tokenize_util.h
similarity index 100%
rename from tokenize_util.h
rename to src/tokenize_util.h
diff --git a/ucache.hpp b/src/ucache.hpp
similarity index 100%
rename from ucache.hpp
rename to src/ucache.hpp
diff --git a/unet.hpp b/src/unet.hpp
similarity index 100%
rename from unet.hpp
rename to src/unet.hpp
diff --git a/upscaler.cpp b/src/upscaler.cpp
similarity index 100%
rename from upscaler.cpp
rename to src/upscaler.cpp
diff --git a/util.cpp b/src/util.cpp
similarity index 100%
rename from util.cpp
rename to src/util.cpp
diff --git a/util.h b/src/util.h
similarity index 100%
rename from util.h
rename to src/util.h
diff --git a/vae.hpp b/src/vae.hpp
similarity index 100%
rename from vae.hpp
rename to src/vae.hpp
diff --git a/version.cpp b/src/version.cpp
similarity index 100%
rename from version.cpp
rename to src/version.cpp
diff --git a/vocab.hpp b/src/vocab.hpp
similarity index 100%
rename from vocab.hpp
rename to src/vocab.hpp
diff --git a/vocab_mistral.hpp b/src/vocab_mistral.hpp
similarity index 100%
rename from vocab_mistral.hpp
rename to src/vocab_mistral.hpp
diff --git a/vocab_qwen.hpp b/src/vocab_qwen.hpp
similarity index 100%
rename from vocab_qwen.hpp
rename to src/vocab_qwen.hpp
diff --git a/vocab_umt5.hpp b/src/vocab_umt5.hpp
similarity index 100%
rename from vocab_umt5.hpp
rename to src/vocab_umt5.hpp
diff --git a/wan.hpp b/src/wan.hpp
similarity index 100%
rename from wan.hpp
rename to src/wan.hpp
diff --git a/z_image.hpp b/src/z_image.hpp
similarity index 100%
rename from z_image.hpp
rename to src/z_image.hpp