update docs

to_out.0 precision fix
2026-06-25 15:46:40 +00:00 · 2025-10-13 23:13:12 +08:00 · 2025-10-13 23:02:24 +08:00
5 changed files with 41 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,7 @@ API and command-line option may change frequently.***
    - [Qwen Image](./docs/qwen_image.md)
  - Image Edit Models
    - [FLUX.1-Kontext-dev](./docs/kontext.md)
+    - [Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
  - Video Models
    - [Wan2.1/Wan2.2](./docs/wan.md)
  - [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
@ -298,6 +299,7 @@ arguments:
  --clip_vision                      path to the clip-vision encoder
  --t5xxl                            path to the t5xxl text encoder
  --qwen2vl                          path to the qwen2vl text encoder
+  --qwen2vl_vision                   path to the qwen2vl vit
  --vae [VAE]                        path to vae
  --taesd [TAESD_PATH]               path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
  --control-net [CONTROL_PATH]       path to control net model
--- a/assets/qwen/qwen_image_edit.png
+++ b/assets/qwen/qwen_image_edit.png
--- a/assets/qwen/qwen_image_edit_2509.png
+++ b/assets/qwen/qwen_image_edit_2509.png
--- a/docs/qwen_image_edit.md
+++ b/docs/qwen_image_edit.md
@ -0,0 +1,35 @@
+# How to Use
+
+## Download weights
+
+- Download Qwen Image
+    - Qwen Image Edit
+        - safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
+        - gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-GGUF/tree/main
+    - Qwen Image Edit 2509
+        - safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
+        - gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main
+- Download vae
+    - safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
+- Download qwen_2.5_vl 7b
+    - safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders
+    - gguf: https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/tree/main
+
+## Examples
+
+### Qwen Image Edit
+
+```
+.\bin\Release\sd.exe --diffusion-model  ..\..\ComfyUI\models\diffusion_models\Qwen_Image_Edit-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors  --qwen2vl ..\..\ComfyUI\models\text_encoders\qwen_2.5_vl_7b.safetensors --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'edit.cpp'" --seed 1118877715456453
+```
+
+<img alt="qwen_image_edit" src="../assets/qwen/qwen_image_edit.png" />
+
+
+### Qwen Image Edit 2509
+
+```
+.\bin\Release\sd.exe --diffusion-model  ..\..\ComfyUI\models\diffusion_models\Qwen-Image-Edit-2509-Q4_K_S.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors  --qwen2vl ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf --qwen2vl_vision ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct.mmproj-Q8_0.gguf --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'Qwen Image Edit 2509'"
+```
+
+<img alt="qwen_image_edit_2509" src="../assets/qwen/qwen_image_edit_2509.png" />
--- a/qwen_image.hpp
+++ b/qwen_image.hpp
@ -94,12 +94,12 @@ namespace Qwen {
            blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
            blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));

-            blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias));
-            // to_out.1 is nn.Dropout
-
            float scale = 1.f / 32.f;
            // The purpose of the scale here is to prevent NaN issues in certain situations.
            // For example when using CUDA but the weights are k-quants (not all prompts).
+            blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, false, scale));
+            // to_out.1 is nn.Dropout
+
            blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));
        }

@ -159,7 +159,7 @@ namespace Qwen {
            auto k = ggml_concat(ctx, txt_k, img_k, 2);  // [N, n_txt_token + n_img_token, n_head, d_head]
            auto v = ggml_concat(ctx, txt_v, img_v, 2);  // [N, n_txt_token + n_img_token, n_head, d_head]

-            auto attn         = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 256.f));  // [N, n_txt_token + n_img_token, n_head*d_head]
+            auto attn         = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 128.f));  // [N, n_txt_token + n_img_token, n_head*d_head]
            attn              = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3));                           // [n_txt_token + n_img_token, N, hidden_size]
            auto txt_attn_out = ggml_view_3d(ctx,
                                             attn,
Author	SHA1	Message	Date
leejet	c47affcd59	update docs	2025-10-13 23:13:12 +08:00
leejet	4edc3ad2ad	to_out.0 precision fix	2025-10-13 23:02:24 +08:00