2025-12-13 05:48:56 +00:00
4 changed files with 2 additions and 39 deletions
--- a/common.hpp
+++ b/common.hpp
@ -410,22 +410,6 @@ protected:
    int64_t context_dim = 768;  // hidden_size, 1024 for VERSION_SD2
    bool use_linear     = false;

-    void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
-        auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
-        if (iter != tensor_storage_map.end()) {
-            int64_t inner_dim = n_head * d_head;
-            if (iter->second.n_dims == 4 && use_linear) {
-                use_linear         = false;
-                blocks["proj_in"]  = std::make_shared<Conv2d>(in_channels, inner_dim, std::pair{1, 1});
-                blocks["proj_out"] = std::make_shared<Conv2d>(inner_dim, in_channels, std::pair{1, 1});
-            } else if (iter->second.n_dims == 2 && !use_linear) {
-                use_linear         = true;
-                blocks["proj_in"]  = std::make_shared<Linear>(in_channels, inner_dim);
-                blocks["proj_out"] = std::make_shared<Linear>(inner_dim, in_channels);
-            }
-        }
-    }
-
 public:
    SpatialTransformer(int64_t in_channels,
                       int64_t n_head,
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -1926,8 +1926,8 @@ public:
        if (prefix.size() > 0) {
            prefix = prefix + ".";
        }
-        init_params(ctx, tensor_storage_map, prefix);
        init_blocks(ctx, tensor_storage_map, prefix);
+        init_params(ctx, tensor_storage_map, prefix);
    }

    size_t get_params_num() {
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -1645,9 +1645,7 @@ public:
        } else {
            latent = gaussian_latent_sample(work_ctx, vae_output);
        }
-        if (!use_tiny_autoencoder) {
-            process_latent_in(latent);
-        }
+        process_latent_in(latent);
        if (sd_version_is_qwen_image(version)) {
            latent = ggml_reshape_4d(work_ctx, latent, latent->ne[0], latent->ne[1], latent->ne[3], 1);
        }
--- a/vae.hpp
+++ b/vae.hpp
@ -66,25 +66,6 @@ protected:
    int64_t in_channels;
    bool use_linear;

-    void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
-        auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
-        if (iter != tensor_storage_map.end()) {
-            if (iter->second.n_dims == 4 && use_linear) {
-                use_linear         = false;
-                blocks["q"]        = std::make_shared<Conv2d>(in_channels, in_channels, std::pair{1, 1});
-                blocks["k"]        = std::make_shared<Conv2d>(in_channels, in_channels, std::pair{1, 1});
-                blocks["v"]        = std::make_shared<Conv2d>(in_channels, in_channels, std::pair{1, 1});
-                blocks["proj_out"] = std::make_shared<Conv2d>(in_channels, in_channels, std::pair{1, 1});
-            } else if (iter->second.n_dims == 2 && !use_linear) {
-                use_linear         = true;
-                blocks["q"]        = std::make_shared<Linear>(in_channels, in_channels);
-                blocks["k"]        = std::make_shared<Linear>(in_channels, in_channels);
-                blocks["v"]        = std::make_shared<Linear>(in_channels, in_channels);
-                blocks["proj_out"] = std::make_shared<Linear>(in_channels, in_channels);
-            }
-        }
-    }
-
 public:
    AttnBlock(int64_t in_channels, bool use_linear)
        : in_channels(in_channels), use_linear(use_linear) {