diff --git a/README.md b/README.md index a5c86b0..afa0ec3 100644 --- a/README.md +++ b/README.md @@ -313,6 +313,9 @@ arguments: -i, --end-img [IMAGE] path to the end image, required by flf2v --control-image [IMAGE] path to image condition, control net -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) + --control-video [PATH] path to control video frames, It must be a directory path. + The video frames inside should be stored as images in lexicographical (character) order + For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, 鈥?etc. --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1). -o, --output OUTPUT path to write result image to (default: ./output.png) -p, --prompt [PROMPT] the prompt to render @@ -379,6 +382,7 @@ arguments: --moe-boundary BOUNDARY timestep boundary for Wan2.2 MoE model. (default: 0.875) only enabled if `--high-noise-steps` is set to -1 --flow-shift SHIFT shift value for Flow models like SD3.x or WAN (default: auto) + --vace-strength wan vace strength -v, --verbose print extra info ``` diff --git a/assets/wan/Wan2.1_1.3B_vace_r2v.mp4 b/assets/wan/Wan2.1_1.3B_vace_r2v.mp4 new file mode 100644 index 0000000..05f6cfa Binary files /dev/null and b/assets/wan/Wan2.1_1.3B_vace_r2v.mp4 differ diff --git a/assets/wan/Wan2.1_1.3B_vace_t2v.mp4 b/assets/wan/Wan2.1_1.3B_vace_t2v.mp4 new file mode 100644 index 0000000..73862e8 Binary files /dev/null and b/assets/wan/Wan2.1_1.3B_vace_t2v.mp4 differ diff --git a/assets/wan/Wan2.1_1.3B_vace_v2v.mp4 b/assets/wan/Wan2.1_1.3B_vace_v2v.mp4 new file mode 100644 index 0000000..2cc4c0a Binary files /dev/null and b/assets/wan/Wan2.1_1.3B_vace_v2v.mp4 differ diff --git a/assets/wan/Wan2.1_14B_vace_r2v.mp4 b/assets/wan/Wan2.1_14B_vace_r2v.mp4 new file mode 100644 index 0000000..686371f Binary files /dev/null and b/assets/wan/Wan2.1_14B_vace_r2v.mp4 differ diff --git a/assets/wan/Wan2.1_14B_vace_t2v.mp4 b/assets/wan/Wan2.1_14B_vace_t2v.mp4 new file mode 100644 index 0000000..cebe8f9 Binary files /dev/null and b/assets/wan/Wan2.1_14B_vace_t2v.mp4 differ diff --git a/assets/wan/Wan2.1_14B_vace_v2v.mp4 b/assets/wan/Wan2.1_14B_vace_v2v.mp4 new file mode 100644 index 0000000..95f30d4 Binary files /dev/null and b/assets/wan/Wan2.1_14B_vace_v2v.mp4 differ diff --git a/docs/wan.md b/docs/wan.md index a3e5d69..5bde71c 100644 --- a/docs/wan.md +++ b/docs/wan.md @@ -18,6 +18,12 @@ - Wan2.1 FLF2V 14B 720P - safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/diffusion_models - gguf: https://huggingface.co/city96/Wan2.1-FLF2V-14B-720P-gguf/tree/main + - Wan2.1 VACE 1.3B + - safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/diffusion_models + - gguf: https://huggingface.co/calcuis/wan-1.3b-gguf/tree/main + - Wan2.1 VACE 14B + - safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/tree/main/split_files/diffusion_models + - gguf: https://huggingface.co/QuantStack/Wan2.1_14B_VACE-GGUF/tree/main - Wan2.2 - Wan2.2 TI2V 5B - safetensors: https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/tree/main/split_files/diffusion_models @@ -137,3 +143,62 @@ ``` + +### Wan2.1 VACE 1.3B + +#### T2V + +``` +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 1 --offload-to-cpu +``` + + + + +#### R2V + +``` +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu +``` + + + + +#### V2V + +``` +mkdir post+depth +ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\frame_%04d.jpg +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu +``` + + + +### Wan2.1 VACE 14B + +#### T2V + +``` +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 33 --offload-to-cpu +``` + + + + +#### R2V + +``` +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu +``` + + + + + +#### V2V + +``` +.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu +``` + + diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 35f2d97..af03c15 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -231,7 +231,7 @@ void print_usage(int argc, const char* argv[]) { printf(" -i, --end-img [IMAGE] path to the end image, required by flf2v\n"); printf(" --control-image [IMAGE] path to image condition, control net\n"); printf(" -r, --ref-image [PATH] reference image for Flux Kontext models (can be used multiple times) \n"); - printf(" --control-video [PATH] path to control video frames, It must be a directory path."); + printf(" --control-video [PATH] path to control video frames, It must be a directory path.\n"); printf(" The video frames inside should be stored as images in lexicographical (character) order\n"); printf(" For example, if the control video path is `frames`, the directory contain images such as 00.png, 01.png, … etc.\n"); printf(" --increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).\n");