mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
2 Commits
162d5cef64
...
c47affcd59
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c47affcd59 | ||
|
|
4edc3ad2ad |
@ -24,6 +24,7 @@ API and command-line option may change frequently.***
|
||||
- [Qwen Image](./docs/qwen_image.md)
|
||||
- Image Edit Models
|
||||
- [FLUX.1-Kontext-dev](./docs/kontext.md)
|
||||
- [Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
|
||||
- Video Models
|
||||
- [Wan2.1/Wan2.2](./docs/wan.md)
|
||||
- [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
|
||||
@ -298,6 +299,7 @@ arguments:
|
||||
--clip_vision path to the clip-vision encoder
|
||||
--t5xxl path to the t5xxl text encoder
|
||||
--qwen2vl path to the qwen2vl text encoder
|
||||
--qwen2vl_vision path to the qwen2vl vit
|
||||
--vae [VAE] path to vae
|
||||
--taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
||||
--control-net [CONTROL_PATH] path to control net model
|
||||
|
||||
BIN
assets/qwen/qwen_image_edit.png
Normal file
BIN
assets/qwen/qwen_image_edit.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 457 KiB |
BIN
assets/qwen/qwen_image_edit_2509.png
Normal file
BIN
assets/qwen/qwen_image_edit_2509.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 415 KiB |
35
docs/qwen_image_edit.md
Normal file
35
docs/qwen_image_edit.md
Normal file
@ -0,0 +1,35 @@
|
||||
# How to Use
|
||||
|
||||
## Download weights
|
||||
|
||||
- Download Qwen Image
|
||||
- Qwen Image Edit
|
||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
|
||||
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-GGUF/tree/main
|
||||
- Qwen Image Edit 2509
|
||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
|
||||
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main
|
||||
- Download vae
|
||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
|
||||
- Download qwen_2.5_vl 7b
|
||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders
|
||||
- gguf: https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/tree/main
|
||||
|
||||
## Examples
|
||||
|
||||
### Qwen Image Edit
|
||||
|
||||
```
|
||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen_Image_Edit-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --qwen2vl ..\..\ComfyUI\models\text_encoders\qwen_2.5_vl_7b.safetensors --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'edit.cpp'" --seed 1118877715456453
|
||||
```
|
||||
|
||||
<img alt="qwen_image_edit" src="../assets/qwen/qwen_image_edit.png" />
|
||||
|
||||
|
||||
### Qwen Image Edit 2509
|
||||
|
||||
```
|
||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen-Image-Edit-2509-Q4_K_S.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --qwen2vl ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf --qwen2vl_vision ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct.mmproj-Q8_0.gguf --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'Qwen Image Edit 2509'"
|
||||
```
|
||||
|
||||
<img alt="qwen_image_edit_2509" src="../assets/qwen/qwen_image_edit_2509.png" />
|
||||
@ -94,12 +94,12 @@ namespace Qwen {
|
||||
blocks["norm_added_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
||||
blocks["norm_added_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(dim_head, eps));
|
||||
|
||||
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias));
|
||||
// to_out.1 is nn.Dropout
|
||||
|
||||
float scale = 1.f / 32.f;
|
||||
// The purpose of the scale here is to prevent NaN issues in certain situations.
|
||||
// For example when using CUDA but the weights are k-quants (not all prompts).
|
||||
blocks["to_out.0"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_dim, out_bias, false, false, scale));
|
||||
// to_out.1 is nn.Dropout
|
||||
|
||||
blocks["to_add_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, out_context_dim, out_bias, false, false, scale));
|
||||
}
|
||||
|
||||
@ -159,7 +159,7 @@ namespace Qwen {
|
||||
auto k = ggml_concat(ctx, txt_k, img_k, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
auto v = ggml_concat(ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||
|
||||
auto attn = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 256.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
auto attn = Rope::attention(ctx, backend, q, k, v, pe, mask, flash_attn, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||
attn = ggml_cont(ctx, ggml_permute(ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
||||
auto txt_attn_out = ggml_view_3d(ctx,
|
||||
attn,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user