fix: make the immediate LoRA apply mode work better when using Vulkan (#1021)

2026-02-05 03:16:05 +00:00 · 2025-11-30 12:08:25 +08:00 · 2025-11-30 12:08:25 +08:00 · bc80225336
commit bc80225336
parent ab7e8d285e
1 changed files with 18 additions and 1 deletions
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -1141,6 +1141,14 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx,
 }

 __STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor* a) {
+#ifdef SD_USE_VULKAN
+    auto zero_index = ggml_get_tensor(ctx, "ggml_runner_build_in_tensor:zero_int");
+    auto out        = ggml_reshape_1d(ctx, a, ggml_nelements(a));
+    out             = ggml_get_rows(ctx, out, zero_index);
+    out             = ggml_reshape(ctx, out, a);
+    // auto out = ggml_cast(ctx, a, GGML_TYPE_F32);
+    return out;
+#else
    auto out         = ggml_reshape_2d(ctx, a, 1, ggml_nelements(a));
    ggml_tensor* one = ggml_ext_ones(ctx, 1, 1, 1, 1);  // [1,]
    if (ggml_is_transposed(out)) {
@ -1149,6 +1157,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32(ggml_context* ctx, ggml_tensor*
        out = ggml_mul_mat(ctx, out, one);
    }
    out                    = ggml_reshape(ctx, out, a);
+#endif
    return out;
 }

@ -1556,6 +1565,9 @@ protected:
    std::vector<float> one_vec = {1.f};
    ggml_tensor* one_tensor    = nullptr;

+    std::vector<int> zero_int_vec = {0};
+    ggml_tensor* zero_int_tensor  = nullptr;
+
    std::map<struct ggml_tensor*, const void*> backend_tensor_data_map;
    std::map<std::string, struct ggml_tensor*> cache_tensor_map;  // name -> tensor
    const std::string final_result_name = "ggml_runner_final_result_tensor";
@ -1626,10 +1638,15 @@ protected:
        one_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, 1);
        ggml_set_name(one_tensor, "ggml_runner_build_in_tensor:one");
        set_backend_tensor_data(one_tensor, one_vec.data());
+
+        zero_int_tensor = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_I32, 1);
+        ggml_set_name(zero_int_tensor, "ggml_runner_build_in_tensor:zero_int");
+        set_backend_tensor_data(zero_int_tensor, zero_int_vec.data());
    }

    void prepare_build_in_tensor_after(struct ggml_cgraph* gf) {
        ggml_build_forward_expand(gf, one_tensor);
+        ggml_build_forward_expand(gf, zero_int_tensor);
    }

    struct ggml_cgraph* new_graph_custom(size_t graph_size) {