Compare commits
166 Commits
master-371
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
545fac4f3f | ||
|
|
5265a5efa1 | ||
|
|
84cbd88df1 | ||
|
|
997bb11fb6 | ||
|
|
862a6586cb | ||
|
|
61d8331ef3 | ||
|
|
acc3bf1fdc | ||
|
|
83eabd7c01 | ||
|
|
630ee03f23 | ||
|
|
f6968bc589 | ||
|
|
adfef62900 | ||
|
|
6fa7ca9317 | ||
|
|
d6dd6d7b55 | ||
|
|
dea4980f4e | ||
|
|
c8fb3d2458 | ||
|
|
3d33caaef8 | ||
|
|
9b424db0f4 | ||
|
|
d95062737e | ||
|
|
7c880f80c7 | ||
|
|
aaa8a51bd8 | ||
|
|
ba35dd734e | ||
|
|
d41f5fff69 | ||
|
|
810ef0cf76 | ||
|
|
5792c66879 | ||
|
|
39d54702a6 | ||
|
|
60889bc9a1 | ||
|
|
e64baa3611 | ||
|
|
cec4aedcfd | ||
|
|
4cdfff5ff2 | ||
|
|
0752cc9d3a | ||
|
|
b314d80ad0 | ||
|
|
c9cd49701a | ||
|
|
c5eb1e4137 | ||
|
|
636d3cb6ff | ||
|
|
adea272225 | ||
|
|
45ce78a3ae | ||
|
|
28ef93c0e1 | ||
|
|
3296545090 | ||
|
|
d60fb27560 | ||
|
|
c7ccafbd6f | ||
|
|
aa0b899397 | ||
|
|
5e264372ce | ||
|
|
f0f641a142 | ||
|
|
9f56833e14 | ||
|
|
65891d74cc | ||
|
|
f957fa3d2a | ||
|
|
c252e03c6b | ||
|
|
e63daba33d | ||
|
|
3959109281 | ||
|
|
e411520407 | ||
|
|
43e829f219 | ||
|
|
7837232631 | ||
|
|
4ccce027b2 | ||
|
|
fa61ea744d | ||
|
|
5e4579c11d | ||
|
|
329571131d | ||
|
|
a48b4a3ade | ||
|
|
b87fe13afd | ||
|
|
e50e1f253d | ||
|
|
c6206fb351 | ||
|
|
639091fbe9 | ||
|
|
9293016c9d | ||
|
|
2efd19978d | ||
|
|
61659ef299 | ||
|
|
9565c7f6bd | ||
|
|
fbce16e02d | ||
|
|
7010bb4dff | ||
|
|
48d3161a8d | ||
|
|
271b594e74 | ||
|
|
885e62ea82 | ||
|
|
0e52afc651 | ||
|
|
27b5f17401 | ||
|
|
dfe6d6c664 | ||
|
|
9be0b91927 | ||
|
|
e7e83ed4d1 | ||
|
|
c5602a676c | ||
|
|
c34730d9b4 | ||
|
|
fdcacc1ebb | ||
|
|
496ec9421e | ||
|
|
05006cd6e1 | ||
|
|
b90b1ee9cf | ||
|
|
2cef4badb8 | ||
|
|
a119a4da9a | ||
|
|
6eefd2d49a | ||
|
|
4ff2c8c74b | ||
|
|
51bd9c8004 | ||
|
|
d0d836ae74 | ||
|
|
a2d83dd0c8 | ||
|
|
cc107714d7 | ||
|
|
37c9860b79 | ||
|
|
ccb6b0ac9d | ||
|
|
df4efe26bd | ||
|
|
860a78e248 | ||
|
|
a0adcfb148 | ||
|
|
3d5fdd7b37 | ||
|
|
3e6c428c27 | ||
|
|
96fcb13fc0 | ||
|
|
3e812460cf | ||
|
|
98916e8256 | ||
|
|
298b11069f | ||
|
|
30a91138f8 | ||
|
|
c6937ba44a | ||
|
|
ca5b1969a8 | ||
|
|
50ff966445 | ||
|
|
88ec9d30b1 | ||
|
|
60abda56e0 | ||
|
|
23fce0bd84 | ||
|
|
7c88c4765c | ||
|
|
1f77545cf8 | ||
|
|
8e9f3a4d9e | ||
|
|
78e15bd4af | ||
|
|
97cf2efe45 | ||
|
|
bda7fab9f2 | ||
|
|
c2e18c86e8 | ||
|
|
c3ad6a13e1 | ||
|
|
ebe9d26a72 | ||
|
|
9fa7f415df | ||
|
|
a23262dfde | ||
|
|
e687913bf1 | ||
|
|
200cb6f2ca | ||
|
|
43a70e819b | ||
|
|
614f8736df | ||
|
|
d96b4152d6 | ||
|
|
8f05f5bc6e | ||
|
|
15d0f82760 | ||
|
|
6888fcb581 | ||
|
|
2aecdd57ca | ||
|
|
11ab095230 | ||
|
|
a3a88fc9b2 | ||
|
|
8823dc48bc | ||
|
|
1ac5a616de | ||
|
|
d939f6e86a | ||
|
|
e72aea796e | ||
|
|
a908436729 | ||
|
|
583a02e29e | ||
|
|
96c3e64057 | ||
|
|
0392273e10 | ||
|
|
bf1a388b44 | ||
|
|
c9005337a8 | ||
|
|
2f0bd31a84 | ||
|
|
bfbb929790 | ||
|
|
689e44c9a8 | ||
|
|
985aedda32 | ||
|
|
3f3610b5cd | ||
|
|
118683de8a | ||
|
|
bcc9c0d0b3 | ||
|
|
5865b5e703 | ||
|
|
edf2cb3846 | ||
|
|
99e17232a4 | ||
|
|
710169df5c | ||
|
|
e4c50f1de5 | ||
|
|
0743a1b3b5 | ||
|
|
34a6fd4e60 | ||
|
|
3c1187ce83 | ||
|
|
20eb674100 | ||
|
|
bc80225336 | ||
|
|
ab7e8d285e | ||
|
|
673dbdda17 | ||
|
|
0249509a30 | ||
|
|
52b67c538b | ||
|
|
20345888a3 | ||
|
|
490c51d963 | ||
|
|
45c46779af | ||
|
|
869d023416 | ||
|
|
e9bc3b6c06 | ||
|
|
b542894fb9 |
@ -1,4 +1,5 @@
|
|||||||
build*/
|
build*/
|
||||||
|
docs/
|
||||||
test/
|
test/
|
||||||
|
|
||||||
.cache/
|
.cache/
|
||||||
|
|||||||
365
.github/workflows/build.yml
vendored
@ -21,11 +21,13 @@ on:
|
|||||||
"**/*.c",
|
"**/*.c",
|
||||||
"**/*.cpp",
|
"**/*.cpp",
|
||||||
"**/*.cu",
|
"**/*.cu",
|
||||||
|
"examples/server/frontend/**",
|
||||||
]
|
]
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths:
|
paths:
|
||||||
[
|
[
|
||||||
|
".github/workflows/**",
|
||||||
"**/CMakeLists.txt",
|
"**/CMakeLists.txt",
|
||||||
"**/Makefile",
|
"**/Makefile",
|
||||||
"**/*.h",
|
"**/*.h",
|
||||||
@ -33,11 +35,16 @@ on:
|
|||||||
"**/*.c",
|
"**/*.c",
|
||||||
"**/*.cpp",
|
"**/*.cpp",
|
||||||
"**/*.cu",
|
"**/*.cu",
|
||||||
|
"examples/server/frontend/**",
|
||||||
]
|
]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ubuntu-latest-cmake:
|
ubuntu-latest-cmake:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@ -49,6 +56,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
id: depends
|
id: depends
|
||||||
run: |
|
run: |
|
||||||
@ -66,7 +83,7 @@ jobs:
|
|||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
- name: Fetch system info
|
- name: Fetch system info
|
||||||
id: system-info
|
id: system-info
|
||||||
@ -92,6 +109,143 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
||||||
|
|
||||||
|
ubuntu-latest-cmake-vulkan:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install build-essential libvulkan-dev glslc
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DSD_BUILD_SHARED_LIBS=ON -DSD_VULKAN=ON
|
||||||
|
cmake --build . --config Release
|
||||||
|
|
||||||
|
- name: Get commit hash
|
||||||
|
id: commit
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
|
- name: Fetch system info
|
||||||
|
id: system-info
|
||||||
|
run: |
|
||||||
|
echo "CPU_ARCH=`uname -m`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_NAME=`lsb_release -s -i`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_VERSION=`lsb_release -s -r`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_TYPE=`uname -s`" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Pack artifacts
|
||||||
|
id: pack_artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
run: |
|
||||||
|
cp ggml/LICENSE ./build/bin/ggml.txt
|
||||||
|
cp LICENSE ./build/bin/stable-diffusion.cpp.txt
|
||||||
|
zip -j sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-vulkan.zip ./build/bin/*
|
||||||
|
|
||||||
|
- name: Upload artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-vulkan.zip
|
||||||
|
path: |
|
||||||
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-vulkan.zip
|
||||||
|
|
||||||
|
build-and-push-docker-images:
|
||||||
|
name: Build and push container images
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
id-token: write
|
||||||
|
attestations: write
|
||||||
|
artifact-metadata: write
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
variant: [musa, sycl, vulkan, cuda]
|
||||||
|
|
||||||
|
env:
|
||||||
|
REGISTRY: ghcr.io
|
||||||
|
IMAGE_NAME: ${{ github.repository }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
|
- name: Get commit hash
|
||||||
|
id: commit
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to the container registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ${{ env.REGISTRY }}
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Extract metadata for Docker
|
||||||
|
id: meta
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
with:
|
||||||
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
|
||||||
|
- name: Free Disk Space (Ubuntu)
|
||||||
|
uses: jlumbroso/free-disk-space@v1.3.1
|
||||||
|
with:
|
||||||
|
# this might remove tools that are actually needed,
|
||||||
|
# if set to "true" but frees about 6 GB
|
||||||
|
tool-cache: false
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
id: build-push
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
file: Dockerfile.${{ matrix.variant }}
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.BRANCH_NAME }}-${{ matrix.variant }}
|
||||||
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
annotations: ${{ steps.meta.outputs.annotations }}
|
||||||
|
|
||||||
macOS-latest-cmake:
|
macOS-latest-cmake:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
|
|
||||||
@ -102,6 +256,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
id: depends
|
id: depends
|
||||||
run: |
|
run: |
|
||||||
@ -119,7 +283,7 @@ jobs:
|
|||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
- name: Fetch system info
|
- name: Fetch system info
|
||||||
id: system-info
|
id: system-info
|
||||||
@ -146,7 +310,7 @@ jobs:
|
|||||||
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
|
||||||
|
|
||||||
windows-latest-cmake:
|
windows-latest-cmake:
|
||||||
runs-on: windows-2025
|
runs-on: windows-2022
|
||||||
|
|
||||||
env:
|
env:
|
||||||
VULKAN_VERSION: 1.4.328.1
|
VULKAN_VERSION: 1.4.328.1
|
||||||
@ -163,8 +327,8 @@ jobs:
|
|||||||
- build: "avx512"
|
- build: "avx512"
|
||||||
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
|
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
|
||||||
- build: "cuda12"
|
- build: "cuda12"
|
||||||
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;86;80;75"
|
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES='61;70;75;80;86;89;90;100;120' -DCMAKE_CUDA_FLAGS='-Xcudafe \"--diag_suppress=177\" -Xcudafe \"--diag_suppress=550\"'"
|
||||||
- build: 'vulkan'
|
- build: "vulkan"
|
||||||
defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON"
|
defines: "-DSD_VULKAN=ON -DSD_BUILD_SHARED_LIBS=ON"
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@ -173,12 +337,22 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
- name: Install cuda-toolkit
|
- name: Install cuda-toolkit
|
||||||
id: cuda-toolkit
|
id: cuda-toolkit
|
||||||
if: ${{ matrix.build == 'cuda12' }}
|
if: ${{ matrix.build == 'cuda12' }}
|
||||||
uses: Jimver/cuda-toolkit@v0.2.19
|
uses: Jimver/cuda-toolkit@v0.2.22
|
||||||
with:
|
with:
|
||||||
cuda: "12.6.2"
|
cuda: "12.8.1"
|
||||||
method: "network"
|
method: "network"
|
||||||
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
|
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
|
||||||
|
|
||||||
@ -191,13 +365,17 @@ jobs:
|
|||||||
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
|
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
|
||||||
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
|
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
|
||||||
|
|
||||||
|
- name: Activate MSVC environment
|
||||||
|
id: msvc_dev_cmd
|
||||||
|
uses: ilammy/msvc-dev-cmd@v1
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. ${{ matrix.defines }}
|
cmake .. -DCMAKE_CXX_FLAGS='/bigobj' -G Ninja -DCMAKE_C_COMPILER=cl.exe -DCMAKE_CXX_COMPILER=cl.exe -DCMAKE_BUILD_TYPE=Release ${{ matrix.defines }}
|
||||||
cmake --build . --config Release
|
cmake --build .
|
||||||
|
|
||||||
- name: Check AVX512F support
|
- name: Check AVX512F support
|
||||||
id: check_avx512f
|
id: check_avx512f
|
||||||
@ -215,7 +393,7 @@ jobs:
|
|||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
- name: Pack artifacts
|
- name: Pack artifacts
|
||||||
id: pack_artifacts
|
id: pack_artifacts
|
||||||
@ -274,6 +452,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
- name: Cache ROCm Installation
|
- name: Cache ROCm Installation
|
||||||
id: cache-rocm
|
id: cache-rocm
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
@ -338,7 +526,7 @@ jobs:
|
|||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
- name: Pack artifacts
|
- name: Pack artifacts
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
@ -360,6 +548,156 @@ jobs:
|
|||||||
path: |
|
path: |
|
||||||
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-rocm-x64.zip
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-rocm-x64.zip
|
||||||
|
|
||||||
|
ubuntu-latest-rocm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container: rocm/dev-ubuntu-24.04:7.2
|
||||||
|
|
||||||
|
env:
|
||||||
|
ROCM_VERSION: "7.2"
|
||||||
|
UBUNTU_VERSION: "24.04"
|
||||||
|
GPU_TARGETS: "gfx1151;gfx1150;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- run: apt-get update && apt-get install -y git
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
submodules: recursive
|
||||||
|
|
||||||
|
- name: Setup Node
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9
|
||||||
|
|
||||||
|
- name: Free disk space
|
||||||
|
run: |
|
||||||
|
# Remove preinstalled SDKs and caches not needed for this job
|
||||||
|
sudo rm -rf /usr/share/dotnet || true
|
||||||
|
sudo rm -rf /usr/local/lib/android || true
|
||||||
|
sudo rm -rf /opt/ghc || true
|
||||||
|
sudo rm -rf /usr/local/.ghcup || true
|
||||||
|
sudo rm -rf /opt/hostedtoolcache || true
|
||||||
|
|
||||||
|
# Remove old package lists and caches
|
||||||
|
sudo rm -rf /var/lib/apt/lists/* || true
|
||||||
|
sudo apt clean
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt install -y \
|
||||||
|
cmake \
|
||||||
|
hip-dev \
|
||||||
|
hipblas-dev \
|
||||||
|
ninja-build \
|
||||||
|
rocm-dev \
|
||||||
|
zip
|
||||||
|
# Clean apt caches to recover disk space
|
||||||
|
sudo apt clean
|
||||||
|
sudo rm -rf /var/lib/apt/lists/* || true
|
||||||
|
|
||||||
|
- name: Setup ROCm Environment
|
||||||
|
run: |
|
||||||
|
# Add ROCm to PATH for current session
|
||||||
|
echo "/opt/rocm/bin" >> $GITHUB_PATH
|
||||||
|
|
||||||
|
# Build regex pattern from ${{ env.GPU_TARGETS }} (match target as substring)
|
||||||
|
TARGET_REGEX="($(printf '%s' "${{ env.GPU_TARGETS }}" | sed 's/;/|/g'))"
|
||||||
|
|
||||||
|
# Remove library files for architectures we're not building for to save disk space
|
||||||
|
echo "Cleaning up unneeded architecture files..."
|
||||||
|
cd /opt/rocm/lib/rocblas/library
|
||||||
|
# Keep only our target architectures
|
||||||
|
for file in *; do
|
||||||
|
if printf '%s' "$file" | grep -q 'gfx'; then
|
||||||
|
if ! printf '%s' "$file" | grep -Eq "$TARGET_REGEX"; then
|
||||||
|
echo "Removing $file" &&
|
||||||
|
sudo rm -f "$file";
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
cd /opt/rocm/lib/hipblaslt/library
|
||||||
|
for file in *; do
|
||||||
|
if printf '%s' "$file" | grep -q 'gfx'; then
|
||||||
|
if ! printf '%s' "$file" | grep -Eq "$TARGET_REGEX"; then
|
||||||
|
echo "Removing $file" &&
|
||||||
|
sudo rm -f "$file";
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
id: cmake_build
|
||||||
|
run: |
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -G Ninja \
|
||||||
|
-DCMAKE_CXX_COMPILER=amdclang++ \
|
||||||
|
-DCMAKE_C_COMPILER=amdclang \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DSD_HIPBLAS=ON \
|
||||||
|
-DGPU_TARGETS="${{ env.GPU_TARGETS }}" \
|
||||||
|
-DAMDGPU_TARGETS="${{ env.GPU_TARGETS }}" \
|
||||||
|
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
|
||||||
|
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||||
|
-DSD_BUILD_SHARED_LIBS=ON
|
||||||
|
cmake --build . --config Release
|
||||||
|
|
||||||
|
- name: Get commit hash
|
||||||
|
id: commit
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
|
- name: Prepare artifacts
|
||||||
|
id: prepare_artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
run: |
|
||||||
|
# Copy licenses
|
||||||
|
cp ggml/LICENSE ./build/bin/ggml.txt
|
||||||
|
cp LICENSE ./build/bin/stable-diffusion.cpp.txt
|
||||||
|
|
||||||
|
# Move ROCm runtime libraries (to avoid double space consumption)
|
||||||
|
sudo mv /opt/rocm/lib/librocsparse.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/libhsa-runtime64.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/libamdhip64.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/libhipblas.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/libhipblaslt.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/librocblas.so* ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/rocblas/ ./build/bin/
|
||||||
|
sudo mv /opt/rocm/lib/hipblaslt/ ./build/bin/
|
||||||
|
|
||||||
|
- name: Fetch system info
|
||||||
|
id: system-info
|
||||||
|
run: |
|
||||||
|
echo "CPU_ARCH=`uname -m`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_NAME=`lsb_release -s -i`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_VERSION=`lsb_release -s -r`" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "OS_TYPE=`uname -s`" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Pack artifacts
|
||||||
|
id: pack_artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
run: |
|
||||||
|
cp ggml/LICENSE ./build/bin/ggml.txt
|
||||||
|
cp LICENSE ./build/bin/stable-diffusion.cpp.txt
|
||||||
|
zip -y -r sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-Ubuntu-${{ env.UBUNTU_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-rocm.zip ./build/bin
|
||||||
|
|
||||||
|
- name: Upload artifacts
|
||||||
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-Ubuntu-${{ env.UBUNTU_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-rocm.zip
|
||||||
|
path: |
|
||||||
|
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-Ubuntu-${{ env.UBUNTU_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}-rocm.zip
|
||||||
|
|
||||||
release:
|
release:
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
|
|
||||||
@ -367,6 +705,9 @@ jobs:
|
|||||||
|
|
||||||
needs:
|
needs:
|
||||||
- ubuntu-latest-cmake
|
- ubuntu-latest-cmake
|
||||||
|
- ubuntu-latest-cmake-vulkan
|
||||||
|
- ubuntu-latest-rocm
|
||||||
|
- build-and-push-docker-images
|
||||||
- macOS-latest-cmake
|
- macOS-latest-cmake
|
||||||
- windows-latest-cmake
|
- windows-latest-cmake
|
||||||
- windows-latest-cmake-hip
|
- windows-latest-cmake-hip
|
||||||
@ -392,7 +733,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Get commit hash
|
- name: Get commit hash
|
||||||
id: commit
|
id: commit
|
||||||
uses: pr-mpt/actions-commit-hash@v2
|
uses: prompt/actions-commit-hash@v2
|
||||||
|
|
||||||
- name: Create release
|
- name: Create release
|
||||||
id: create_release
|
id: create_release
|
||||||
|
|||||||
3
.gitmodules
vendored
@ -1,3 +1,6 @@
|
|||||||
[submodule "ggml"]
|
[submodule "ggml"]
|
||||||
path = ggml
|
path = ggml
|
||||||
url = https://github.com/ggml-org/ggml.git
|
url = https://github.com/ggml-org/ggml.git
|
||||||
|
[submodule "examples/server/frontend"]
|
||||||
|
path = examples/server/frontend
|
||||||
|
url = https://github.com/leejet/stable-ui.git
|
||||||
|
|||||||
@ -8,6 +8,11 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
|||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (MSVC)
|
||||||
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||||
|
add_compile_definitions(_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
@ -31,7 +36,6 @@ option(SD_VULKAN "sd: vulkan backend" OFF)
|
|||||||
option(SD_OPENCL "sd: opencl backend" OFF)
|
option(SD_OPENCL "sd: opencl backend" OFF)
|
||||||
option(SD_SYCL "sd: sycl backend" OFF)
|
option(SD_SYCL "sd: sycl backend" OFF)
|
||||||
option(SD_MUSA "sd: musa backend" OFF)
|
option(SD_MUSA "sd: musa backend" OFF)
|
||||||
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
|
||||||
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
|
||||||
option(SD_BUILD_SHARED_GGML_LIB "sd: build ggml as a separate shared lib" OFF)
|
option(SD_BUILD_SHARED_GGML_LIB "sd: build ggml as a separate shared lib" OFF)
|
||||||
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
option(SD_USE_SYSTEM_GGML "sd: use system-installed GGML library" OFF)
|
||||||
@ -65,26 +69,54 @@ if (SD_HIPBLAS)
|
|||||||
message("-- Use HIPBLAS as backend stable-diffusion")
|
message("-- Use HIPBLAS as backend stable-diffusion")
|
||||||
set(GGML_HIP ON)
|
set(GGML_HIP ON)
|
||||||
add_definitions(-DSD_USE_CUDA)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
if(SD_FAST_SOFTMAX)
|
|
||||||
set(GGML_CUDA_FAST_SOFTMAX ON)
|
|
||||||
endif()
|
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if(SD_MUSA)
|
if(SD_MUSA)
|
||||||
message("-- Use MUSA as backend stable-diffusion")
|
message("-- Use MUSA as backend stable-diffusion")
|
||||||
set(GGML_MUSA ON)
|
set(GGML_MUSA ON)
|
||||||
add_definitions(-DSD_USE_CUDA)
|
add_definitions(-DSD_USE_CUDA)
|
||||||
if(SD_FAST_SOFTMAX)
|
|
||||||
set(GGML_CUDA_FAST_SOFTMAX ON)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SD_LIB stable-diffusion)
|
set(SD_LIB stable-diffusion)
|
||||||
|
|
||||||
file(GLOB SD_LIB_SOURCES
|
file(GLOB SD_LIB_SOURCES
|
||||||
"*.h"
|
"src/*.h"
|
||||||
"*.cpp"
|
"src/*.cpp"
|
||||||
"*.hpp"
|
"src/*.hpp"
|
||||||
|
"src/vocab/*.h"
|
||||||
|
"src/vocab/*.cpp"
|
||||||
|
)
|
||||||
|
|
||||||
|
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
|
||||||
|
if(GIT_EXE)
|
||||||
|
execute_process(COMMAND ${GIT_EXE} describe --tags --abbrev=7 --dirty=+
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
OUTPUT_VARIABLE SDCPP_BUILD_VERSION
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
ERROR_QUIET
|
||||||
|
)
|
||||||
|
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
|
||||||
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||||
|
OUTPUT_VARIABLE SDCPP_BUILD_COMMIT
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
ERROR_QUIET
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT SDCPP_BUILD_VERSION)
|
||||||
|
set(SDCPP_BUILD_VERSION unknown)
|
||||||
|
endif()
|
||||||
|
message(STATUS "stable-diffusion.cpp version ${SDCPP_BUILD_VERSION}")
|
||||||
|
|
||||||
|
if(NOT SDCPP_BUILD_COMMIT)
|
||||||
|
set(SDCPP_BUILD_COMMIT unknown)
|
||||||
|
endif()
|
||||||
|
message(STATUS "stable-diffusion.cpp commit ${SDCPP_BUILD_COMMIT}")
|
||||||
|
|
||||||
|
set_property(
|
||||||
|
SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/version.cpp
|
||||||
|
APPEND PROPERTY COMPILE_DEFINITIONS
|
||||||
|
SDCPP_BUILD_COMMIT=${SDCPP_BUILD_COMMIT} SDCPP_BUILD_VERSION=${SDCPP_BUILD_VERSION}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(SD_BUILD_SHARED_LIBS)
|
if(SD_BUILD_SHARED_LIBS)
|
||||||
@ -145,6 +177,7 @@ endif()
|
|||||||
add_subdirectory(thirdparty)
|
add_subdirectory(thirdparty)
|
||||||
|
|
||||||
target_link_libraries(${SD_LIB} PUBLIC ggml zip)
|
target_link_libraries(${SD_LIB} PUBLIC ggml zip)
|
||||||
|
target_include_directories(${SD_LIB} PUBLIC . include)
|
||||||
target_include_directories(${SD_LIB} PUBLIC . thirdparty)
|
target_include_directories(${SD_LIB} PUBLIC . thirdparty)
|
||||||
target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)
|
target_compile_features(${SD_LIB} PUBLIC c_std_11 cxx_std_17)
|
||||||
|
|
||||||
@ -153,7 +186,7 @@ if (SD_BUILD_EXAMPLES)
|
|||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(SD_PUBLIC_HEADERS stable-diffusion.h)
|
set(SD_PUBLIC_HEADERS include/stable-diffusion.h)
|
||||||
set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")
|
set_target_properties(${SD_LIB} PROPERTIES PUBLIC_HEADER "${SD_PUBLIC_HEADERS}")
|
||||||
|
|
||||||
install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
|
install(TARGETS ${SD_LIB} LIBRARY PUBLIC_HEADER)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
ARG UBUNTU_VERSION=22.04
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION AS build
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
@ -17,6 +17,7 @@ RUN apt-get update && \
|
|||||||
apt-get install --yes --no-install-recommends libgomp1 && \
|
apt-get install --yes --no-install-recommends libgomp1 && \
|
||||||
apt-get clean
|
apt-get clean
|
||||||
|
|
||||||
COPY --from=build /sd.cpp/build/bin/sd /sd
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
ENTRYPOINT [ "/sd" ]
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
25
Dockerfile.cuda
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
ARG CUDA_VERSION=12.6.3
|
||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} AS build
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git ccache cmake
|
||||||
|
|
||||||
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
ARG CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
RUN cmake . -B ./build -DSD_CUDA=ON
|
||||||
|
RUN cmake --build ./build --config Release -j$(nproc)
|
||||||
|
|
||||||
|
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-runtime-ubuntu${UBUNTU_VERSION} AS runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install --yes --no-install-recommends libgomp1 && \
|
||||||
|
apt-get clean
|
||||||
|
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
@ -18,6 +18,7 @@ RUN mkdir build && cd build && \
|
|||||||
|
|
||||||
FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64 as runtime
|
FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64 as runtime
|
||||||
|
|
||||||
COPY --from=build /sd.cpp/build/bin/sd /sd
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
ENTRYPOINT [ "/sd" ]
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
@ -14,6 +14,7 @@ RUN mkdir build && cd build && \
|
|||||||
|
|
||||||
FROM intel/oneapi-basekit:${SYCL_VERSION}-devel-ubuntu24.04 AS runtime
|
FROM intel/oneapi-basekit:${SYCL_VERSION}-devel-ubuntu24.04 AS runtime
|
||||||
|
|
||||||
COPY --from=build /sd.cpp/build/bin/sd /sd
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
ENTRYPOINT [ "/sd" ]
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
|
|||||||
23
Dockerfile.vulkan
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
ARG UBUNTU_VERSION=24.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS build
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git cmake libvulkan-dev glslc
|
||||||
|
|
||||||
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN cmake . -B ./build -DSD_VULKAN=ON
|
||||||
|
RUN cmake --build ./build --config Release --parallel
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION AS runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install --yes --no-install-recommends libgomp1 libvulkan1 mesa-vulkan-drivers && \
|
||||||
|
apt-get clean
|
||||||
|
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-cli /sd-cli
|
||||||
|
COPY --from=build /sd.cpp/build/bin/sd-server /sd-server
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/sd-cli" ]
|
||||||
34
README.md
@ -1,5 +1,5 @@
|
|||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="./assets/cat_with_sd_cpp_42.png" width="360x">
|
<img src="./assets/logo.png" width="360x">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
# stable-diffusion.cpp
|
# stable-diffusion.cpp
|
||||||
@ -15,6 +15,15 @@ API and command-line option may change frequently.***
|
|||||||
|
|
||||||
## 🔥Important News
|
## 🔥Important News
|
||||||
|
|
||||||
|
* **2026/01/18** 🚀 stable-diffusion.cpp now supports **FLUX.2-klein**
|
||||||
|
👉 Details: [PR #1193](https://github.com/leejet/stable-diffusion.cpp/pull/1193)
|
||||||
|
|
||||||
|
* **2025/12/01** 🚀 stable-diffusion.cpp now supports **Z-Image**
|
||||||
|
👉 Details: [PR #1020](https://github.com/leejet/stable-diffusion.cpp/pull/1020)
|
||||||
|
|
||||||
|
* **2025/11/30** 🚀 stable-diffusion.cpp now supports **FLUX.2-dev**
|
||||||
|
👉 Details: [PR #1016](https://github.com/leejet/stable-diffusion.cpp/pull/1016)
|
||||||
|
|
||||||
* **2025/10/13** 🚀 stable-diffusion.cpp now supports **Qwen-Image-Edit / Qwen-Image-Edit 2509**
|
* **2025/10/13** 🚀 stable-diffusion.cpp now supports **Qwen-Image-Edit / Qwen-Image-Edit 2509**
|
||||||
👉 Details: [PR #877](https://github.com/leejet/stable-diffusion.cpp/pull/877)
|
👉 Details: [PR #877](https://github.com/leejet/stable-diffusion.cpp/pull/877)
|
||||||
|
|
||||||
@ -37,13 +46,17 @@ API and command-line option may change frequently.***
|
|||||||
- SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
|
- SDXL, [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo)
|
||||||
- [Some SD1.x and SDXL distilled models](./docs/distilled_sd.md)
|
- [Some SD1.x and SDXL distilled models](./docs/distilled_sd.md)
|
||||||
- [SD3/SD3.5](./docs/sd3.md)
|
- [SD3/SD3.5](./docs/sd3.md)
|
||||||
- [Flux-dev/Flux-schnell](./docs/flux.md)
|
- [FLUX.1-dev/FLUX.1-schnell](./docs/flux.md)
|
||||||
|
- [FLUX.2-dev/FLUX.2-klein](./docs/flux2.md)
|
||||||
- [Chroma](./docs/chroma.md)
|
- [Chroma](./docs/chroma.md)
|
||||||
- [Chroma1-Radiance](./docs/chroma_radiance.md)
|
- [Chroma1-Radiance](./docs/chroma_radiance.md)
|
||||||
- [Qwen Image](./docs/qwen_image.md)
|
- [Qwen Image](./docs/qwen_image.md)
|
||||||
|
- [Z-Image](./docs/z_image.md)
|
||||||
|
- [Ovis-Image](./docs/ovis_image.md)
|
||||||
|
- [Anima](./docs/anima.md)
|
||||||
- Image Edit Models
|
- Image Edit Models
|
||||||
- [FLUX.1-Kontext-dev](./docs/kontext.md)
|
- [FLUX.1-Kontext-dev](./docs/kontext.md)
|
||||||
- [Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
|
- [Qwen Image Edit series](./docs/qwen_image_edit.md)
|
||||||
- Video Models
|
- Video Models
|
||||||
- [Wan2.1/Wan2.2](./docs/wan.md)
|
- [Wan2.1/Wan2.2](./docs/wan.md)
|
||||||
- [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
|
- [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
|
||||||
@ -61,7 +74,7 @@ API and command-line option may change frequently.***
|
|||||||
- SYCL
|
- SYCL
|
||||||
- Supported weight formats
|
- Supported weight formats
|
||||||
- Pytorch checkpoint (`.ckpt` or `.pth`)
|
- Pytorch checkpoint (`.ckpt` or `.pth`)
|
||||||
- Safetensors (`./safetensors`)
|
- Safetensors (`.safetensors`)
|
||||||
- GGUF (`.gguf`)
|
- GGUF (`.gguf`)
|
||||||
- Supported platforms
|
- Supported platforms
|
||||||
- Linux
|
- Linux
|
||||||
@ -96,7 +109,7 @@ API and command-line option may change frequently.***
|
|||||||
### Download model weights
|
### Download model weights
|
||||||
|
|
||||||
- download weights(.ckpt or .safetensors or .gguf). For example
|
- download weights(.ckpt or .safetensors or .gguf). For example
|
||||||
- Stable Diffusion v1.5 from https://huggingface.co/runwayml/stable-diffusion-v1-5
|
- Stable Diffusion v1.5 from https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
curl -L -O https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
|
curl -L -O https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
|
||||||
@ -105,7 +118,7 @@ API and command-line option may change frequently.***
|
|||||||
### Generate an image with just one command
|
### Generate an image with just one command
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
|
./bin/sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
|
||||||
```
|
```
|
||||||
|
|
||||||
***For detailed command-line arguments, check out [cli doc](./examples/cli/README.md).***
|
***For detailed command-line arguments, check out [cli doc](./examples/cli/README.md).***
|
||||||
@ -118,12 +131,16 @@ If you want to improve performance or reduce VRAM/RAM usage, please refer to [pe
|
|||||||
|
|
||||||
- [SD1.x/SD2.x/SDXL](./docs/sd.md)
|
- [SD1.x/SD2.x/SDXL](./docs/sd.md)
|
||||||
- [SD3/SD3.5](./docs/sd3.md)
|
- [SD3/SD3.5](./docs/sd3.md)
|
||||||
- [Flux-dev/Flux-schnell](./docs/flux.md)
|
- [FLUX.1-dev/FLUX.1-schnell](./docs/flux.md)
|
||||||
|
- [FLUX.2-dev/FLUX.2-klein](./docs/flux2.md)
|
||||||
- [FLUX.1-Kontext-dev](./docs/kontext.md)
|
- [FLUX.1-Kontext-dev](./docs/kontext.md)
|
||||||
- [Chroma](./docs/chroma.md)
|
- [Chroma](./docs/chroma.md)
|
||||||
- [🔥Qwen Image](./docs/qwen_image.md)
|
- [🔥Qwen Image](./docs/qwen_image.md)
|
||||||
- [🔥Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
|
- [🔥Qwen Image Edit series](./docs/qwen_image_edit.md)
|
||||||
- [🔥Wan2.1/Wan2.2](./docs/wan.md)
|
- [🔥Wan2.1/Wan2.2](./docs/wan.md)
|
||||||
|
- [🔥Z-Image](./docs/z_image.md)
|
||||||
|
- [Ovis-Image](./docs/ovis_image.md)
|
||||||
|
- [Anima](./docs/anima.md)
|
||||||
- [LoRA](./docs/lora.md)
|
- [LoRA](./docs/lora.md)
|
||||||
- [LCM/LCM-LoRA](./docs/lcm.md)
|
- [LCM/LCM-LoRA](./docs/lcm.md)
|
||||||
- [Using PhotoMaker to personalize image generation](./docs/photo_maker.md)
|
- [Using PhotoMaker to personalize image generation](./docs/photo_maker.md)
|
||||||
@ -131,6 +148,7 @@ If you want to improve performance or reduce VRAM/RAM usage, please refer to [pe
|
|||||||
- [Using TAESD to faster decoding](./docs/taesd.md)
|
- [Using TAESD to faster decoding](./docs/taesd.md)
|
||||||
- [Docker](./docs/docker.md)
|
- [Docker](./docs/docker.md)
|
||||||
- [Quantization and GGUF](./docs/quantization_and_gguf.md)
|
- [Quantization and GGUF](./docs/quantization_and_gguf.md)
|
||||||
|
- [Inference acceleration via caching](./docs/caching.md)
|
||||||
|
|
||||||
## Bindings
|
## Bindings
|
||||||
|
|
||||||
|
|||||||
BIN
assets/anima/example.png
Normal file
|
After Width: | Height: | Size: 230 KiB |
BIN
assets/flux2/example.png
Normal file
|
After Width: | Height: | Size: 556 KiB |
BIN
assets/flux2/flux2-klein-4b-edit.png
Normal file
|
After Width: | Height: | Size: 510 KiB |
BIN
assets/flux2/flux2-klein-4b.png
Normal file
|
After Width: | Height: | Size: 455 KiB |
BIN
assets/flux2/flux2-klein-9b-edit.png
Normal file
|
After Width: | Height: | Size: 511 KiB |
BIN
assets/flux2/flux2-klein-9b.png
Normal file
|
After Width: | Height: | Size: 491 KiB |
BIN
assets/flux2/flux2-klein-base-4b.png
Normal file
|
After Width: | Height: | Size: 464 KiB |
BIN
assets/flux2/flux2-klein-base-9b.png
Normal file
|
After Width: | Height: | Size: 552 KiB |
BIN
assets/logo.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/ovis_image/example.png
Normal file
|
After Width: | Height: | Size: 401 KiB |
BIN
assets/qwen/qwen_image_edit_2511.png
Normal file
|
After Width: | Height: | Size: 450 KiB |
BIN
assets/z_image/base_bf16.png
Normal file
|
After Width: | Height: | Size: 870 KiB |
BIN
assets/z_image/bf16.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/z_image/q2_K.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
assets/z_image/q3_K.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
BIN
assets/z_image/q4_0.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/z_image/q4_K.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/z_image/q5_0.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/z_image/q6_K.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
BIN
assets/z_image/q8_0.png
Normal file
|
After Width: | Height: | Size: 1.0 MiB |
21
docs/anima.md
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# How to Use
|
||||||
|
|
||||||
|
## Download weights
|
||||||
|
|
||||||
|
- Download Anima
|
||||||
|
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/diffusion_models
|
||||||
|
- gguf: https://huggingface.co/Bedovyy/Anima-GGUF/tree/main
|
||||||
|
- gguf Anima2: https://huggingface.co/JusteLeo/Anima2-GGUF/tree/main
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/vae
|
||||||
|
- Download Qwen3-0.6B-Base
|
||||||
|
- safetensors: https://huggingface.co/circlestone-labs/Anima/tree/main/split_files/text_encoders
|
||||||
|
- gguf: https://huggingface.co/mradermacher/Qwen3-0.6B-Base-GGUF/tree/main
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```sh
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\anima-preview.safetensors --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_06b_base.safetensors -p "a lovely cat holding a sign says 'anima.cpp'" --cfg-scale 6.0 --sampling-method euler -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="anima image example" src="../assets/anima/example.png" />
|
||||||
141
docs/caching.md
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
## Caching
|
||||||
|
|
||||||
|
Caching methods accelerate diffusion inference by reusing intermediate computations when changes between steps are small.
|
||||||
|
|
||||||
|
### Cache Modes
|
||||||
|
|
||||||
|
| Mode | Target | Description |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| `ucache` | UNET models | Condition-level caching with error tracking |
|
||||||
|
| `easycache` | DiT models | Condition-level cache |
|
||||||
|
| `dbcache` | DiT models | Block-level L1 residual threshold |
|
||||||
|
| `taylorseer` | DiT models | Taylor series approximation |
|
||||||
|
| `cache-dit` | DiT models | Combined DBCache + TaylorSeer |
|
||||||
|
| `spectrum` | UNET models | Chebyshev + Taylor output forecasting |
|
||||||
|
|
||||||
|
### UCache (UNET Models)
|
||||||
|
|
||||||
|
UCache caches the residual difference (output - input) and reuses it when input changes are below threshold.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sd-cli -m model.safetensors -p "a cat" --cache-mode ucache --cache-option "threshold=1.5"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|-----------|-------------|---------|
|
||||||
|
| `threshold` | Error threshold for reuse decision | 1.0 |
|
||||||
|
| `start` | Start caching at this percent of steps | 0.15 |
|
||||||
|
| `end` | Stop caching at this percent of steps | 0.95 |
|
||||||
|
| `decay` | Error decay rate (0-1) | 1.0 |
|
||||||
|
| `relative` | Scale threshold by output norm (0/1) | 1 |
|
||||||
|
| `reset` | Reset error after computing (0/1) | 1 |
|
||||||
|
|
||||||
|
#### Reset Parameter
|
||||||
|
|
||||||
|
The `reset` parameter controls error accumulation behavior:
|
||||||
|
|
||||||
|
- `reset=1` (default): Resets accumulated error after each computed step. More aggressive caching, works well with most samplers.
|
||||||
|
- `reset=0`: Keeps error accumulated. More conservative, recommended for `euler_a` sampler.
|
||||||
|
|
||||||
|
### EasyCache (DiT Models)
|
||||||
|
|
||||||
|
Condition-level caching for DiT models. Caches and reuses outputs when input changes are below threshold.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--cache-mode easycache --cache-option "threshold=0.3"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|-----------|-------------|---------|
|
||||||
|
| `threshold` | Input change threshold for reuse | 0.2 |
|
||||||
|
| `start` | Start caching at this percent of steps | 0.15 |
|
||||||
|
| `end` | Stop caching at this percent of steps | 0.95 |
|
||||||
|
|
||||||
|
### Cache-DIT (DiT Models)
|
||||||
|
|
||||||
|
For DiT models like FLUX and QWEN, use block-level caching modes.
|
||||||
|
|
||||||
|
#### DBCache
|
||||||
|
|
||||||
|
Caches blocks based on L1 residual difference threshold:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--cache-mode dbcache --cache-option "threshold=0.25,warmup=4"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### TaylorSeer
|
||||||
|
|
||||||
|
Uses Taylor series approximation to predict block outputs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--cache-mode taylorseer
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Cache-DIT (Combined)
|
||||||
|
|
||||||
|
Combines DBCache and TaylorSeer:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--cache-mode cache-dit
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|-----------|-------------|---------|
|
||||||
|
| `Fn` | Front blocks to always compute | 8 |
|
||||||
|
| `Bn` | Back blocks to always compute | 0 |
|
||||||
|
| `threshold` | L1 residual difference threshold | 0.08 |
|
||||||
|
| `warmup` | Steps before caching starts | 8 |
|
||||||
|
|
||||||
|
#### SCM Options
|
||||||
|
|
||||||
|
Steps Computation Mask controls which steps can be cached:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--scm-mask "1,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1"
|
||||||
|
```
|
||||||
|
|
||||||
|
Mask values: `1` = compute, `0` = can cache.
|
||||||
|
|
||||||
|
| Policy | Description |
|
||||||
|
|--------|-------------|
|
||||||
|
| `dynamic` | Check threshold before caching |
|
||||||
|
| `static` | Always cache on cacheable steps |
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--scm-policy dynamic
|
||||||
|
```
|
||||||
|
|
||||||
|
### Spectrum (UNET Models)
|
||||||
|
|
||||||
|
Spectrum uses Chebyshev polynomial fitting blended with Taylor extrapolation to predict denoised outputs, skipping entire UNet forward passes. Based on the paper [Spectrum: Adaptive Spectral Feature Forecasting for Efficient Diffusion Sampling](https://github.com/tingyu215/Spectrum).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sd-cli -m model.safetensors -p "a cat" --cache-mode spectrum
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
|
||||||
|
| Parameter | Description | Default |
|
||||||
|
|-----------|-------------|---------|
|
||||||
|
| `w` | Chebyshev vs Taylor blend weight (0=Taylor, 1=Chebyshev) | 0.40 |
|
||||||
|
| `m` | Chebyshev polynomial degree | 3 |
|
||||||
|
| `lam` | Ridge regression regularization | 1.0 |
|
||||||
|
| `window` | Initial window size (compute every N steps) | 2 |
|
||||||
|
| `flex` | Window growth per computed step after warmup | 0.50 |
|
||||||
|
| `warmup` | Steps to always compute before caching starts | 4 |
|
||||||
|
| `stop` | Stop caching at this fraction of total steps | 0.9 |
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Tips
|
||||||
|
|
||||||
|
- Start with default thresholds and adjust based on output quality
|
||||||
|
- Lower threshold = better quality, less speedup
|
||||||
|
- Higher threshold = more speedup, potential quality loss
|
||||||
|
- More steps generally means more caching opportunities
|
||||||
@ -15,7 +15,7 @@ You can run Chroma using stable-diffusion.cpp with a GPU that has 6GB or even 4G
|
|||||||
You can download the preconverted gguf weights from [silveroxides/Chroma-GGUF](https://huggingface.co/silveroxides/Chroma-GGUF), this way you don't have to do the conversion yourself.
|
You can download the preconverted gguf weights from [silveroxides/Chroma-GGUF](https://huggingface.co/silveroxides/Chroma-GGUF), this way you don't have to do the conversion yourself.
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M convert -m ..\..\ComfyUI\models\unet\chroma-unlocked-v40.safetensors -o ..\models\chroma-unlocked-v40-q8_0.gguf -v --type q8_0
|
.\bin\Release\sd-cli.exe -M convert -m ..\..\ComfyUI\models\unet\chroma-unlocked-v40.safetensors -o ..\models\chroma-unlocked-v40-q8_0.gguf -v --type q8_0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
@ -24,7 +24,7 @@ You can download the preconverted gguf weights from [silveroxides/Chroma-GGUF](h
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask --clip-on-cpu
|
.\bin\Release\sd-cli.exe --diffusion-model ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
|
|||||||
@ -12,7 +12,7 @@
|
|||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Chroma1-Radiance-v0.4-Q8_0.gguf --t5xxl ..\..\ComfyUI\models\clip\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma radiance cpp'" --cfg-scale 4.0 --sampling-method euler -v
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Chroma1-Radiance-v0.4-Q8_0.gguf --t5xxl ..\..\ComfyUI\models\clip\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma radiance cpp'" --cfg-scale 4.0 --sampling-method euler -v
|
||||||
```
|
```
|
||||||
|
|
||||||
<img alt="Chroma1-Radiance" src="../assets/flux/chroma1-radiance.png" />
|
<img alt="Chroma1-Radiance" src="../assets/flux/chroma1-radiance.png" />
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
# Running distilled models: SSD1B and SDx.x with tiny U-Nets
|
# Running distilled models: SSD1B, Vega and SDx.x with tiny U-Nets
|
||||||
|
|
||||||
## Preface
|
## Preface
|
||||||
|
|
||||||
These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
|
These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B and Vega U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
|
||||||
Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
|
Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
|
||||||
|
|
||||||
## SSD1B
|
## SSD1B
|
||||||
@ -17,7 +17,17 @@ Useful LoRAs are also available:
|
|||||||
* https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
|
* https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
|
||||||
* https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
|
* https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
|
||||||
|
|
||||||
These files can be used out-of-the-box, unlike the models described in the next section.
|
## Vega
|
||||||
|
|
||||||
|
Segmind's Vega model is available online here:
|
||||||
|
|
||||||
|
* https://huggingface.co/segmind/Segmind-Vega/resolve/main/segmind-vega.safetensors
|
||||||
|
|
||||||
|
VegaRT is an example for an LCM-LoRA:
|
||||||
|
|
||||||
|
* https://huggingface.co/segmind/Segmind-VegaRT/resolve/main/pytorch_lora_weights.safetensors
|
||||||
|
|
||||||
|
Both files can be used out-of-the-box, unlike the models described in next sections.
|
||||||
|
|
||||||
|
|
||||||
## SD1.x, SD2.x with tiny U-Nets
|
## SD1.x, SD2.x with tiny U-Nets
|
||||||
@ -83,7 +93,7 @@ python convert_diffusers_to_original_stable_diffusion.py \
|
|||||||
The file segmind_tiny-sd.ckpt will be generated and is now ready for use with sd.cpp. You can follow a similar process for the other models mentioned above.
|
The file segmind_tiny-sd.ckpt will be generated and is now ready for use with sd.cpp. You can follow a similar process for the other models mentioned above.
|
||||||
|
|
||||||
|
|
||||||
### Another available .ckpt file:
|
##### Another available .ckpt file:
|
||||||
|
|
||||||
* https://huggingface.co/ClashSAN/small-sd/resolve/main/tinySDdistilled.ckpt
|
* https://huggingface.co/ClashSAN/small-sd/resolve/main/tinySDdistilled.ckpt
|
||||||
|
|
||||||
@ -97,3 +107,31 @@ for key, value in ckpt['state_dict'].items():
|
|||||||
ckpt['state_dict'][key] = value.contiguous()
|
ckpt['state_dict'][key] = value.contiguous()
|
||||||
torch.save(ckpt, "tinySDdistilled_fixed.ckpt")
|
torch.save(ckpt, "tinySDdistilled_fixed.ckpt")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### SDXS-512
|
||||||
|
|
||||||
|
Another very tiny and **incredibly fast** model is SDXS by IDKiro et al. The authors refer to it as *"Real-Time One-Step Latent Diffusion Models with Image Conditions"*. For details read the paper: https://arxiv.org/pdf/2403.16627 . Once again the authors removed some more blocks of U-Net part and unlike other SD1 models they use an adjusted _AutoEncoderTiny_ instead of default _AutoEncoderKL_ for the VAE part.
|
||||||
|
|
||||||
|
##### 1. Download the diffusers model from Hugging Face using Python:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from diffusers import StableDiffusionPipeline
|
||||||
|
pipe = StableDiffusionPipeline.from_pretrained("IDKiro/sdxs-512-dreamshaper")
|
||||||
|
pipe.save_pretrained(save_directory="sdxs")
|
||||||
|
```
|
||||||
|
##### 2. Create a safetensors file
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python convert_diffusers_to_original_stable_diffusion.py \
|
||||||
|
--model_path sdxs --checkpoint_path sdxs.safetensors --half --use_safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
##### 3. Run the model as follows:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
~/stable-diffusion.cpp/build/bin/sd-cli -m sdxs.safetensors -p "portrait of a lovely cat" \
|
||||||
|
--cfg-scale 1 --steps 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Both options: ``` --cfg-scale 1 ``` and ``` --steps 1 ``` are mandatory here.
|
||||||
|
|||||||
@ -1,15 +1,39 @@
|
|||||||
## Docker
|
# Docker
|
||||||
|
|
||||||
### Building using Docker
|
## Run CLI
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker run --rm -v /path/to/models:/models -v /path/to/output/:/output ghcr.io/leejet/stable-diffusion.cpp:master [args...]
|
||||||
|
# For example
|
||||||
|
# docker run --rm -v ./models:/models -v ./build:/output ghcr.io/leejet/stable-diffusion.cpp:master -m /models/sd-v1-4.ckpt -p "a lovely cat" -v -o /output/output.png
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run server
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker run --rm --init -v /path/to/models:/models -v /path/to/output/:/output -p "1234:1234" --entrypoint "/sd-server" ghcr.io/leejet/stable-diffusion.cpp:master [args...]
|
||||||
|
# For example
|
||||||
|
# docker run --rm --init -v ./models:/models -v ./build:/output -p "1234:1234" --entrypoint "/sd-server" ghcr.io/leejet/stable-diffusion.cpp:master -m /models/sd-v1-4.ckpt -p "a lovely cat" -v -o /output/output.png
|
||||||
|
```
|
||||||
|
|
||||||
|
## Building using Docker
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
docker build -t sd .
|
docker build -t sd .
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run
|
## Building variants using Docker
|
||||||
|
|
||||||
|
Vulkan:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
docker run -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
|
docker build -f Dockerfile.vulkan -t sd .
|
||||||
# For example
|
```
|
||||||
# docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4.ckpt -p "a lovely cat" -v -o /output/output.png
|
|
||||||
|
## Run locally built image's CLI
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker run --rm -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
|
||||||
|
# For example
|
||||||
|
# docker run --rm -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4.ckpt -p "a lovely cat" -v -o /output/output.png
|
||||||
```
|
```
|
||||||
@ -1,9 +1,9 @@
|
|||||||
## Using ESRGAN to upscale results
|
## Using ESRGAN to upscale results
|
||||||
|
|
||||||
You can use ESRGAN to upscale the generated images. At the moment, only the [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth) model is supported. Support for more models of this architecture will be added soon.
|
You can use ESRGAN—such as the model [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth)—to upscale the generated images and improve their overall resolution and clarity.
|
||||||
|
|
||||||
- Specify the model path using the `--upscale-model PATH` parameter. example:
|
- Specify the model path using the `--upscale-model PATH` parameter. example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --upscale-model ../models/RealESRGAN_x4plus_anime_6B.pth
|
sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --upscale-model ../models/RealESRGAN_x4plus_anime_6B.pth
|
||||||
```
|
```
|
||||||
|
|||||||
10
docs/flux.md
@ -15,9 +15,9 @@ You can run Flux using stable-diffusion.cpp with a GPU that has 6GB or even 4GB
|
|||||||
|
|
||||||
You can download the preconverted gguf weights from [FLUX.1-dev-gguf](https://huggingface.co/leejet/FLUX.1-dev-gguf) or [FLUX.1-schnell](https://huggingface.co/leejet/FLUX.1-schnell-gguf), this way you don't have to do the conversion yourself.
|
You can download the preconverted gguf weights from [FLUX.1-dev-gguf](https://huggingface.co/leejet/FLUX.1-dev-gguf) or [FLUX.1-schnell](https://huggingface.co/leejet/FLUX.1-schnell-gguf), this way you don't have to do the conversion yourself.
|
||||||
|
|
||||||
Using fp16 will lead to overflow, but ggml's support for bf16 is not yet fully developed. Therefore, we need to convert flux to gguf format here, which also saves VRAM. For example:
|
For example:
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M convert -m ..\..\ComfyUI\models\unet\flux1-dev.sft -o ..\models\flux1-dev-q8_0.gguf -v --type q8_0
|
.\bin\Release\sd-cli.exe -M convert -m ..\..\ComfyUI\models\unet\flux1-dev.sft -o ..\models\flux1-dev-q8_0.gguf -v --type q8_0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
@ -28,7 +28,7 @@ Using fp16 will lead to overflow, but ggml's support for bf16 is not yet fully d
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\models\flux1-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
.\bin\Release\sd-cli.exe --diffusion-model ..\models\flux1-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
Using formats of different precisions will yield results of varying quality.
|
Using formats of different precisions will yield results of varying quality.
|
||||||
@ -44,7 +44,7 @@ Using formats of different precisions will yield results of varying quality.
|
|||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\models\flux1-schnell-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --steps 4 --clip-on-cpu
|
.\bin\Release\sd-cli.exe --diffusion-model ..\models\flux1-schnell-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --steps 4 --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
| q8_0 |
|
| q8_0 |
|
||||||
@ -60,7 +60,7 @@ Since many flux LoRA training libraries have used various LoRA naming formats, i
|
|||||||
- LoRA model from https://huggingface.co/XLabs-AI/flux-lora-collection/tree/main (using comfy converted version!!!)
|
- LoRA model from https://huggingface.co/XLabs-AI/flux-lora-collection/tree/main (using comfy converted version!!!)
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\models\flux1-dev-q8_0.gguf --vae ...\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'<lora:realism_lora_comfy_converted:1>" --cfg-scale 1.0 --sampling-method euler -v --lora-model-dir ../models --clip-on-cpu
|
.\bin\Release\sd-cli.exe --diffusion-model ..\models\flux1-dev-q8_0.gguf --vae ...\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'<lora:realism_lora_comfy_converted:1>" --cfg-scale 1.0 --sampling-method euler -v --lora-model-dir ../models --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
|
|||||||
92
docs/flux2.md
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
# How to Use
|
||||||
|
|
||||||
|
## Flux.2-dev
|
||||||
|
|
||||||
|
### Download weights
|
||||||
|
|
||||||
|
- Download FLUX.2-dev
|
||||||
|
- gguf: https://huggingface.co/city96/FLUX.2-dev-gguf/tree/main
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-dev/tree/main
|
||||||
|
- Download Mistral-Small-3.2-24B-Instruct-2506-GGUF
|
||||||
|
- gguf: https://huggingface.co/unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF/tree/main
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux2-dev-Q4_K_S.gguf --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf -r .\kontext_input.png -p "change 'flux.cpp' to 'flux2-dev.cpp'" --cfg-scale 1.0 --sampling-method euler -v --diffusion-fa --offload-to-cpu
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2 example" src="../assets/flux2/example.png" />
|
||||||
|
|
||||||
|
## Flux.2 klein 4B / Flux.2 klein base 4B
|
||||||
|
|
||||||
|
### Download weights
|
||||||
|
|
||||||
|
- Download FLUX.2-klein-4B
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-klein-4B
|
||||||
|
- gguf: https://huggingface.co/leejet/FLUX.2-klein-4B-GGUF/tree/main
|
||||||
|
- Download FLUX.2-klein-base-4B
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4B
|
||||||
|
- gguf: https://huggingface.co/leejet/FLUX.2-klein-base-4B-GGUF/tree/main
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-dev/tree/main
|
||||||
|
- Download Qwen3 4b
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/flux2-klein-4B/tree/main/split_files/text_encoders
|
||||||
|
- gguf: https://huggingface.co/unsloth/Qwen3-4B-GGUF/tree/main
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-4b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_4b.safetensors -p "a lovely cat" --cfg-scale 1.0 --steps 4 -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-4b" src="../assets/flux2/flux2-klein-4b.png" />
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-4b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_4b.safetensors -r .\kontext_input.png -p "change 'flux.cpp' to 'klein.cpp'" --cfg-scale 1.0 --sampling-method euler -v --diffusion-fa --offload-to-cpu --steps 4
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-4b-edit" src="../assets/flux2/flux2-klein-4b-edit.png" />
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-base-4b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_4b.safetensors -p "a lovely cat" --cfg-scale 4.0 --steps 20 -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-base-4b" src="../assets/flux2/flux2-klein-base-4b.png" />
|
||||||
|
|
||||||
|
## Flux.2 klein 9B / Flux.2 klein base 9B
|
||||||
|
|
||||||
|
### Download weights
|
||||||
|
|
||||||
|
- Download FLUX.2-klein-9B
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-klein-9B
|
||||||
|
- gguf: https://huggingface.co/leejet/FLUX.2-klein-9B-GGUF/tree/main
|
||||||
|
- Download FLUX.2-klein-base-9B
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-klein-base-9B
|
||||||
|
- gguf: https://huggingface.co/leejet/FLUX.2-klein-base-9B-GGUF/tree/main
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.2-dev/tree/main
|
||||||
|
- Download Qwen3 8B
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/flux2-klein-9B/tree/main/split_files/text_encoders
|
||||||
|
- gguf: https://huggingface.co/unsloth/Qwen3-8B-GGUF/tree/main
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-9b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_8b.safetensors -p "a lovely cat" --cfg-scale 1.0 --steps 4 -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-9b" src="../assets/flux2/flux2-klein-9b.png" />
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-9b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_8b.safetensors -r .\kontext_input.png -p "change 'flux.cpp' to 'klein.cpp'" --cfg-scale 1.0 --sampling-method euler -v --diffusion-fa --offload-to-cpu --steps 4
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-9b-edit" src="../assets/flux2/flux2-klein-9b-edit.png" />
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\flux-2-klein-base-9b.safetensors --vae ..\..\ComfyUI\models\vae\flux2_ae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_3_8b.safetensors -p "a lovely cat" --cfg-scale 4.0 --steps 20 -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="flux2-klein-base-9b" src="../assets/flux2/flux2-klein-base-9b.png" />
|
||||||
@ -82,4 +82,4 @@ cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_H
|
|||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
If everything went OK, `build\bin\sd.exe` file should appear.
|
If everything went OK, `build\bin\sd-cli.exe` file should appear.
|
||||||
|
|||||||
@ -16,7 +16,7 @@ You can run Kontext using stable-diffusion.cpp with a GPU that has 6GB or even 4
|
|||||||
You can download the preconverted gguf weights from [FLUX.1-Kontext-dev-GGUF](https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF), this way you don't have to do the conversion yourself.
|
You can download the preconverted gguf weights from [FLUX.1-Kontext-dev-GGUF](https://huggingface.co/QuantStack/FLUX.1-Kontext-dev-GGUF), this way you don't have to do the conversion yourself.
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M convert -m ..\..\ComfyUI\models\unet\flux1-kontext-dev.safetensors -o ..\models\flux1-kontext-dev-q8_0.gguf -v --type q8_0
|
.\bin\Release\sd-cli.exe -M convert -m ..\..\ComfyUI\models\unet\flux1-kontext-dev.safetensors -o ..\models\flux1-kontext-dev-q8_0.gguf -v --type q8_0
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
@ -27,7 +27,7 @@ You can download the preconverted gguf weights from [FLUX.1-Kontext-dev-GGUF](ht
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -r .\flux1-dev-q8_0.png --diffusion-model ..\models\flux1-kontext-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "change 'flux.cpp' to 'kontext.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
.\bin\Release\sd-cli.exe -r .\flux1-dev-q8_0.png --diffusion-model ..\models\flux1-kontext-dev-q8_0.gguf --vae ..\models\ae.sft --clip_l ..\models\clip_l.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -p "change 'flux.cpp' to 'kontext.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
Here's a simple example:
|
Here's a simple example:
|
||||||
|
|
||||||
```
|
```
|
||||||
./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:lcm-lora-sdv1-5:1>" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
|
./bin/sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:lcm-lora-sdv1-5:1>" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
|
||||||
```
|
```
|
||||||
|
|
||||||
| without LCM-LoRA (--cfg-scale 7) | with LCM-LoRA (--cfg-scale 1) |
|
| without LCM-LoRA (--cfg-scale 7) | with LCM-LoRA (--cfg-scale 1) |
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
Here's a simple example:
|
Here's a simple example:
|
||||||
|
|
||||||
```
|
```
|
||||||
./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
|
./bin/sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
|
||||||
```
|
```
|
||||||
|
|
||||||
`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
|
`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
|
||||||
|
|||||||
19
docs/ovis_image.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# How to Use
|
||||||
|
|
||||||
|
## Download weights
|
||||||
|
|
||||||
|
- Download Ovis-Image-7B
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/Ovis-Image/tree/main/split_files/diffusion_models
|
||||||
|
- gguf: https://huggingface.co/leejet/Ovis-Image-7B-GGUF
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.1-schnell/tree/main
|
||||||
|
- Download Ovis 2.5
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/Ovis-Image/tree/main/split_files/text_encoders
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ovis_image-Q4_0.gguf --vae ..\..\ComfyUI\models\vae\ae.sft --llm ..\..\ComfyUI\models\text_encoders\ovis_2.5.safetensors -p "a lovely cat" --cfg-scale 5.0 -v --offload-to-cpu --diffusion-fa
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="ovis image example" src="../assets/ovis_image/example.png" />
|
||||||
@ -27,7 +27,7 @@ If on low memory GPUs (<= 8GB), recommend running with ```--vae-on-cpu``` option
|
|||||||
Example:
|
Example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bin/sd -m ../models/sdxlUnstableDiffusers_v11.safetensors --vae ../models/sdxl_vae.safetensors --photo-maker ../models/photomaker-v1.safetensors --pm-id-images-dir ../assets/photomaker_examples/scarletthead_woman -p "a girl img, retro futurism, retro game art style but extremely beautiful, intricate details, masterpiece, best quality, space-themed, cosmic, celestial, stars, galaxies, nebulas, planets, science fiction, highly detailed" -n "realistic, photo-realistic, worst quality, greyscale, bad anatomy, bad hands, error, text" --cfg-scale 5.0 --sampling-method euler -H 1024 -W 1024 --pm-style-strength 10 --vae-on-cpu --steps 50
|
bin/sd-cli -m ../models/sdxlUnstableDiffusers_v11.safetensors --vae ../models/sdxl_vae.safetensors --photo-maker ../models/photomaker-v1.safetensors --pm-id-images-dir ../assets/photomaker_examples/scarletthead_woman -p "a girl img, retro futurism, retro game art style but extremely beautiful, intricate details, masterpiece, best quality, space-themed, cosmic, celestial, stars, galaxies, nebulas, planets, science fiction, highly detailed" -n "realistic, photo-realistic, worst quality, greyscale, bad anatomy, bad hands, error, text" --cfg-scale 5.0 --sampling-method euler -H 1024 -W 1024 --pm-style-strength 10 --vae-on-cpu --steps 50
|
||||||
```
|
```
|
||||||
|
|
||||||
## PhotoMaker Version 2
|
## PhotoMaker Version 2
|
||||||
|
|||||||
@ -23,5 +23,5 @@ You can also convert weights in the formats `ckpt/safetensors/diffusers` to gguf
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./bin/sd -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
|
./bin/sd-cli -M convert -m ../models/v1-5-pruned-emaonly.safetensors -o ../models/v1-5-pruned-emaonly.q8_0.gguf -v --type q8_0
|
||||||
```
|
```
|
||||||
@ -14,7 +14,7 @@
|
|||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\qwen-image-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --qwen2vl ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf -p '一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 “一、Qwen-Image的技术路线: 探索视觉生成基础模型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。”' --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu -H 1024 -W 1024 --diffusion-fa --flow-shift 3
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\qwen-image-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --llm ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf -p '一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 “一、Qwen-Image的技术路线: 探索视觉生成基础模型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。”' --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu -H 1024 -W 1024 --diffusion-fa --flow-shift 3
|
||||||
```
|
```
|
||||||
|
|
||||||
<img alt="qwen example" src="../assets/qwen/example.png" />
|
<img alt="qwen example" src="../assets/qwen/example.png" />
|
||||||
|
|||||||
@ -9,6 +9,9 @@
|
|||||||
- Qwen Image Edit 2509
|
- Qwen Image Edit 2509
|
||||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
|
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
|
||||||
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main
|
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main
|
||||||
|
- Qwen Image Edit 2511
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
|
||||||
|
- gguf: https://huggingface.co/unsloth/Qwen-Image-Edit-2511-GGUF/tree/main
|
||||||
- Download vae
|
- Download vae
|
||||||
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
|
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
|
||||||
- Download qwen_2.5_vl 7b
|
- Download qwen_2.5_vl 7b
|
||||||
@ -20,7 +23,7 @@
|
|||||||
### Qwen Image Edit
|
### Qwen Image Edit
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen_Image_Edit-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --qwen2vl ..\..\ComfyUI\models\text_encoders\qwen_2.5_vl_7b.safetensors --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'edit.cpp'" --seed 1118877715456453
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen_Image_Edit-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_2.5_vl_7b.safetensors --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'edit.cpp'" --seed 1118877715456453
|
||||||
```
|
```
|
||||||
|
|
||||||
<img alt="qwen_image_edit" src="../assets/qwen/qwen_image_edit.png" />
|
<img alt="qwen_image_edit" src="../assets/qwen/qwen_image_edit.png" />
|
||||||
@ -29,7 +32,17 @@
|
|||||||
### Qwen Image Edit 2509
|
### Qwen Image Edit 2509
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen-Image-Edit-2509-Q4_K_S.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --qwen2vl ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf --qwen2vl_vision ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct.mmproj-Q8_0.gguf --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'Qwen Image Edit 2509'"
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\Qwen-Image-Edit-2509-Q4_K_S.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --llm ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct-Q8_0.gguf --llm_vision ..\..\ComfyUI\models\text_encoders\Qwen2.5-VL-7B-Instruct.mmproj-Q8_0.gguf --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'Qwen Image Edit 2509'"
|
||||||
```
|
```
|
||||||
|
|
||||||
<img alt="qwen_image_edit_2509" src="../assets/qwen/qwen_image_edit_2509.png" />
|
<img alt="qwen_image_edit_2509" src="../assets/qwen/qwen_image_edit_2509.png" />
|
||||||
|
|
||||||
|
### Qwen Image Edit 2511
|
||||||
|
|
||||||
|
To use the new Qwen Image Edit 2511 mode, the `--qwen-image-zero-cond-t` flag must be enabled; otherwise, image editing quality will degrade significantly.
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\qwen-image-edit-2511-Q4_K_M.gguf --vae ..\..\ComfyUI\models\vae\qwen_image_vae.safetensors --llm ..\..\ComfyUI\models\text_encoders\qwen_2.5_vl_7b.safetensors --cfg-scale 2.5 --sampling-method euler -v --offload-to-cpu --diffusion-fa --flow-shift 3 -r ..\assets\flux\flux1-dev-q8_0.png -p "change 'flux.cpp' to 'edit.cpp'" --qwen-image-zero-cond-t
|
||||||
|
```
|
||||||
|
|
||||||
|
<img alt="qwen_image_edit_2509" src="../assets/qwen/qwen_image_edit_2511.png" />
|
||||||
14
docs/sd.md
@ -9,12 +9,12 @@
|
|||||||
### txt2img example
|
### txt2img example
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./bin/sd -m ../models/sd-v1-4.ckpt -p "a lovely cat"
|
./bin/sd-cli -m ../models/sd-v1-4.ckpt -p "a lovely cat"
|
||||||
# ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
|
# ./bin/sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
|
||||||
# ./bin/sd -m ../models/sd_xl_base_1.0.safetensors --vae ../models/sdxl_vae-fp16-fix.safetensors -H 1024 -W 1024 -p "a lovely cat" -v
|
# ./bin/sd-cli -m ../models/sd_xl_base_1.0.safetensors --vae ../models/sdxl_vae-fp16-fix.safetensors -H 1024 -W 1024 -p "a lovely cat" -v
|
||||||
# ./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable Diffusion CPP\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
# ./bin/sd-cli -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable Diffusion CPP\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
||||||
# ./bin/sd --diffusion-model ../models/flux1-dev-q3_k.gguf --vae ../models/ae.sft --clip_l ../models/clip_l.safetensors --t5xxl ../models/t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
# ./bin/sd-cli --diffusion-model ../models/flux1-dev-q3_k.gguf --vae ../models/ae.sft --clip_l ../models/clip_l.safetensors --t5xxl ../models/t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v --clip-on-cpu
|
||||||
# ./bin/sd -m ..\models\sd3.5_large.safetensors --clip_l ..\models\clip_l.safetensors --clip_g ..\models\clip_g.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable diffusion 3.5 Large\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
# ./bin/sd-cli -m ..\models\sd3.5_large.safetensors --clip_l ..\models\clip_l.safetensors --clip_g ..\models\clip_g.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable diffusion 3.5 Large\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
Using formats of different precisions will yield results of varying quality.
|
Using formats of different precisions will yield results of varying quality.
|
||||||
@ -29,7 +29,7 @@ Using formats of different precisions will yield results of varying quality.
|
|||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
./bin/sd -m ../models/sd-v1-4.ckpt -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
|
./bin/sd-cli -m ../models/sd-v1-4.ckpt -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
|
||||||
```
|
```
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
|
|||||||
@ -14,7 +14,7 @@
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -m ..\models\sd3.5_large.safetensors --clip_l ..\models\clip_l.safetensors --clip_g ..\models\clip_g.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable diffusion 3.5 Large\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
.\bin\Release\sd-cli.exe -m ..\models\sd3.5_large.safetensors --clip_l ..\models\clip_l.safetensors --clip_g ..\models\clip_g.safetensors --t5xxl ..\models\t5xxl_fp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable diffusion 3.5 Large\"' --cfg-scale 4.5 --sampling-method euler -v --clip-on-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
@ -13,5 +13,27 @@ curl -L -O https://huggingface.co/madebyollin/taesd/resolve/main/diffusion_pytor
|
|||||||
- Specify the model path using the `--taesd PATH` parameter. example:
|
- Specify the model path using the `--taesd PATH` parameter. example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
|
sd-cli -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --taesd ../models/diffusion_pytorch_model.safetensors
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Qwen-Image and wan (TAEHV)
|
||||||
|
|
||||||
|
sd.cpp also supports [TAEHV](https://github.com/madebyollin/taehv) (#937), which can be used for Qwen-Image and wan.
|
||||||
|
|
||||||
|
- For **Qwen-Image and wan2.1 and wan2.2-A14B**, download the wan2.1 tae [safetensors weights](https://github.com/madebyollin/taehv/blob/main/safetensors/taew2_1.safetensors)
|
||||||
|
|
||||||
|
Or curl
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -O https://github.com/madebyollin/taehv/raw/refs/heads/main/safetensors/taew2_1.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
- For **wan2.2-TI2V-5B**, use the wan2.2 tae [safetensors weights](https://github.com/madebyollin/taehv/blob/main/safetensors/taew2_2.safetensors)
|
||||||
|
|
||||||
|
Or curl
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -O https://github.com/madebyollin/taehv/raw/refs/heads/main/safetensors/taew2_2.safetensors
|
||||||
|
```
|
||||||
|
|
||||||
|
Then simply replace the `--vae xxx.safetensors` with `--tae xxx.safetensors` in the commands. If it still out of VRAM, add `--vae-conv-direct` to your command though might be slower.
|
||||||
|
|||||||
37
docs/wan.md
@ -39,6 +39,9 @@
|
|||||||
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors
|
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/vae/wan_2.1_vae.safetensors
|
||||||
- wan_2.2_vae (for Wan2.2 TI2V 5B only)
|
- wan_2.2_vae (for Wan2.2 TI2V 5B only)
|
||||||
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors
|
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/blob/main/split_files/vae/wan2.2_vae.safetensors
|
||||||
|
|
||||||
|
> Wan models vae requires really much VRAM! If you do not have enough VRAM, please try tae instead, though the results may be poorer. For tae usage, please refer to [taesd](taesd.md)
|
||||||
|
|
||||||
- Download umt5_xxl
|
- Download umt5_xxl
|
||||||
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/text_encoders/umt5_xxl_fp16.safetensors
|
- safetensors: https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/blob/main/split_files/text_encoders/umt5_xxl_fp16.safetensors
|
||||||
- gguf: https://huggingface.co/city96/umt5-xxl-encoder-gguf/tree/main
|
- gguf: https://huggingface.co/city96/umt5-xxl-encoder-gguf/tree/main
|
||||||
@ -52,7 +55,7 @@
|
|||||||
### Wan2.1 T2V 1.3B
|
### Wan2.1 T2V 1.3B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1_t2v_1.3B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 33 --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1_t2v_1.3B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 33 --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_1.3B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_1.3B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -60,7 +63,7 @@
|
|||||||
### Wan2.1 T2V 14B
|
### Wan2.1 T2V 14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-t2v-14b-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-t2v-14b-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_14B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_14B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -70,7 +73,7 @@
|
|||||||
### Wan2.1 I2V 14B
|
### Wan2.1 I2V 14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-i2v-14b-480p-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --clip_vision ..\..\ComfyUI\models\clip_vision\clip_vision_h.safetensors -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-i2v-14b-480p-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --clip_vision ..\..\ComfyUI\models\clip_vision\clip_vision_h.safetensors -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_14B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_14B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -78,7 +81,7 @@
|
|||||||
### Wan2.2 T2V A14B
|
### Wan2.2 T2V A14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_14B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_14B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -86,7 +89,7 @@
|
|||||||
### Wan2.2 I2V A14B
|
### Wan2.2 I2V A14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --offload-to-cpu -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --video-frames 33 --offload-to-cpu -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_14B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_14B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -94,7 +97,7 @@
|
|||||||
### Wan2.2 T2V A14B T2I
|
### Wan2.2 T2V A14B T2I
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<img width="832" height="480" alt="Wan2 2_14B_t2i" src="../assets/wan/Wan2.2_14B_t2i.png" />
|
<img width="832" height="480" alt="Wan2 2_14B_t2i" src="../assets/wan/Wan2.2_14B_t2i.png" />
|
||||||
@ -102,7 +105,7 @@
|
|||||||
### Wan2.2 T2V 14B with Lora
|
### Wan2.2 T2V 14B with Lora
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat<lora:wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise:1><lora:|high_noise|wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise:1>" --cfg-scale 3.5 --sampling-method euler --steps 4 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 4 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --lora-model-dir ..\..\ComfyUI\models\loras --video-frames 33 --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat<lora:wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise:1><lora:|high_noise|wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise:1>" --cfg-scale 3.5 --sampling-method euler --steps 4 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 4 -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --offload-to-cpu --lora-model-dir ..\..\ComfyUI\models\loras --video-frames 33 --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_14B_t2v_lora.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_14B_t2v_lora.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -114,7 +117,7 @@
|
|||||||
#### T2V
|
#### T2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.2_ti2v_5B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan2.2_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.2_ti2v_5B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan2.2_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --offload-to-cpu --video-frames 33 --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_5B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_5B_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -122,7 +125,7 @@
|
|||||||
#### I2V
|
#### I2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.2_ti2v_5B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan2.2_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --offload-to-cpu --video-frames 33 -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.2_ti2v_5B_fp16.safetensors --vae ..\..\ComfyUI\models\vae\wan2.2_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --offload-to-cpu --video-frames 33 -i ..\assets\cat_with_sd_cpp_42.png --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_5B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_5B_i2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -130,7 +133,7 @@
|
|||||||
### Wan2.1 FLF2V 14B
|
### Wan2.1 FLF2V 14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-flf2v-14b-720p-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --clip_vision ..\..\ComfyUI\models\clip_vision\clip_vision_h.safetensors -p "glass flower blossom" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu --init-img ..\..\ComfyUI\input\start_image.png --end-img ..\..\ComfyUI\input\end_image.png --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-flf2v-14b-720p-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --clip_vision ..\..\ComfyUI\models\clip_vision\clip_vision_h.safetensors -p "glass flower blossom" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu --init-img ..\..\ComfyUI\input\start_image.png --end-img ..\..\ComfyUI\input\end_image.png --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -139,7 +142,7 @@
|
|||||||
### Wan2.2 FLF2V 14B
|
### Wan2.2 FLF2V 14B
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -p "glass flower blossom" -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu --init-img ..\..\ComfyUI\input\start_image.png --end-img ..\..\ComfyUI\input\end_image.png --flow-shift 3.0
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-LowNoise-Q8_0.gguf --high-noise-diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.2-I2V-A14B-HighNoise-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf --cfg-scale 3.5 --sampling-method euler --steps 10 --high-noise-cfg-scale 3.5 --high-noise-sampling-method euler --high-noise-steps 8 -v -p "glass flower blossom" -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa --video-frames 33 --offload-to-cpu --init-img ..\..\ComfyUI\input\start_image.png --end-img ..\..\ComfyUI\input\end_image.png --flow-shift 3.0
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.2_14B_flf2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.2_14B_flf2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -149,7 +152,7 @@
|
|||||||
#### T2V
|
#### T2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 1 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 1 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_1.3B_vace_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_1.3B_vace_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -158,7 +161,7 @@
|
|||||||
#### R2V
|
#### R2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_1.3B_vace_r2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_1.3B_vace_r2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -169,7 +172,7 @@
|
|||||||
```
|
```
|
||||||
mkdir post+depth
|
mkdir post+depth
|
||||||
ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\frame_%04d.jpg
|
ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\frame_%04d.jpg
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\wan2.1-vace-1.3b-q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_1.3B_vace_v2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_1.3B_vace_v2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -179,7 +182,7 @@ ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\fr
|
|||||||
#### T2V
|
#### T2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 33 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa --video-frames 33 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_14B_vace_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_14B_vace_t2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -188,7 +191,7 @@ ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\fr
|
|||||||
#### R2V
|
#### R2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "a lovely cat" --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 832 -H 480 --diffusion-fa -i ..\assets\cat_with_sd_cpp_42.png --video-frames 33 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_14B_vace_r2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_14B_vace_r2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
@ -198,7 +201,7 @@ ffmpeg -i ..\..\ComfyUI\input\post+depth.mp4 -qscale:v 1 -vf fps=8 post+depth\fr
|
|||||||
#### V2V
|
#### V2V
|
||||||
|
|
||||||
```
|
```
|
||||||
.\bin\Release\sd.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu
|
.\bin\Release\sd-cli.exe -M vid_gen --diffusion-model ..\..\ComfyUI\models\diffusion_models\Wan2.1_14B_VACE-Q8_0.gguf --vae ..\..\ComfyUI\models\vae\wan_2.1_vae.safetensors --t5xxl ..\..\ComfyUI\models\text_encoders\umt5-xxl-encoder-Q8_0.gguf -p "The girl is dancing in a sea of flowers, slowly moving her hands. There is a close - up shot of her upper body. The character is surrounded by other transparent glass flowers in the style of Nicoletta Ceccoli, creating a beautiful, surreal, and emotionally expressive movie scene with a white. transparent feel and a dreamyl atmosphere." --cfg-scale 6.0 --sampling-method euler -v -n "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部, 畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" -W 480 -H 832 --diffusion-fa -i ..\..\ComfyUI\input\dance_girl.jpg --control-video ./post+depth --video-frames 33 --offload-to-cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
<video src=../assets/wan/Wan2.1_14B_vace_v2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
<video src=../assets/wan/Wan2.1_14B_vace_v2v.mp4 controls="controls" muted="muted" type="video/mp4"></video>
|
||||||
|
|||||||
41
docs/z_image.md
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# How to Use
|
||||||
|
|
||||||
|
You can run Z-Image with stable-diffusion.cpp on GPUs with 4GB of VRAM — or even less.
|
||||||
|
|
||||||
|
## Download weights
|
||||||
|
|
||||||
|
- Download Z-Image-Turbo
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/z_image_turbo/tree/main/split_files/diffusion_models
|
||||||
|
- gguf: https://huggingface.co/leejet/Z-Image-Turbo-GGUF/tree/main
|
||||||
|
- Download Z-Image
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/z_image/tree/main/split_files/diffusion_models
|
||||||
|
- gguf: https://huggingface.co/unsloth/Z-Image-GGUF/tree/main
|
||||||
|
- Download vae
|
||||||
|
- safetensors: https://huggingface.co/black-forest-labs/FLUX.1-schnell/tree/main
|
||||||
|
- Download Qwen3 4b
|
||||||
|
- safetensors: https://huggingface.co/Comfy-Org/z_image_turbo/tree/main/split_files/text_encoders
|
||||||
|
- gguf: https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF/tree/main
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Z-Image-Turbo
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model z_image_turbo-Q3_K.gguf --vae ..\..\ComfyUI\models\vae\ae.sft --llm ..\..\ComfyUI\models\text_encoders\Qwen3-4B-Instruct-2507-Q4_K_M.gguf -p "A cinematic, melancholic photograph of a solitary hooded figure walking through a sprawling, rain-slicked metropolis at night. The city lights are a chaotic blur of neon orange and cool blue, reflecting on the wet asphalt. The scene evokes a sense of being a single component in a vast machine. Superimposed over the image in a sleek, modern, slightly glitched font is the philosophical quote: 'THE CITY IS A CIRCUIT BOARD, AND I AM A BROKEN TRANSISTOR.' -- moody, atmospheric, profound, dark academic" --cfg-scale 1.0 -v --offload-to-cpu --diffusion-fa -H 1024 -W 512
|
||||||
|
```
|
||||||
|
|
||||||
|
<img width="256" alt="z-image example" src="../assets/z_image/q3_K.png" />
|
||||||
|
|
||||||
|
### Z-Image-Base
|
||||||
|
|
||||||
|
```
|
||||||
|
.\bin\Release\sd-cli.exe --diffusion-model ..\..\ComfyUI\models\diffusion_models\z_image_bf16.safetensors --vae ..\..\ComfyUI\models\vae\ae.sft --llm ..\..\ComfyUI\models\text_encoders\qwen_3_4b.safetensors -p "A cinematic, melancholic photograph of a solitary hooded figure walking through a sprawling, rain-slicked metropolis at night. The city lights are a chaotic blur of neon orange and cool blue, reflecting on the wet asphalt. The scene evokes a sense of being a single component in a vast machine. Superimposed over the image in a sleek, modern, slightly glitched font is the philosophical quote: 'THE CITY IS A CIRCUIT BOARD, AND I AM A BROKEN TRANSISTOR.' -- moody, atmospheric, profound, dark academic" --cfg-scale 5.0 -v --offload-to-cpu --diffusion-fa -H 1024 -W 512
|
||||||
|
```
|
||||||
|
|
||||||
|
<img width="256" alt="z-image example" src="../assets/z_image/base_bf16.png" />
|
||||||
|
|
||||||
|
## Comparison of Different Quantization Types
|
||||||
|
|
||||||
|
| bf16 | q8_0 | q6_K | q5_0 | q4_K | q4_0 | q3_K | q2_K|
|
||||||
|
|---|---|---|---|---|---|---|---|
|
||||||
|
| <img width="256" alt="bf16" src="../assets/z_image/bf16.png" /> | <img width="256" alt="q8_0" src="../assets/z_image/q8_0.png" /> | <img width="256" alt="q6_K" src="../assets/z_image/q6_K.png" /> | <img width="256" alt="q5_0" src="../assets/z_image/q5_0.png" /> | <img width="256" alt="q4_K" src="../assets/z_image/q4_K.png" /> | <img width="256" alt="q4_0" src="../assets/z_image/q4_0.png" /> | <img width="256" alt="q3_K" src="../assets/z_image/q3_K.png" /> | <img width="256" alt="q2_K" src="../assets/z_image/q2_K.png" /> |
|
||||||
@ -1,3 +1,4 @@
|
|||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
add_subdirectory(cli)
|
add_subdirectory(cli)
|
||||||
|
add_subdirectory(server)
|
||||||
@ -1,4 +1,4 @@
|
|||||||
set(TARGET sd)
|
set(TARGET sd-cli)
|
||||||
|
|
||||||
add_executable(${TARGET} main.cpp)
|
add_executable(${TARGET} main.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
|||||||
@ -1,43 +1,94 @@
|
|||||||
# Run
|
# Run
|
||||||
|
|
||||||
```
|
```
|
||||||
usage: ./bin/sd [options]
|
usage: ./bin/sd-cli [options]
|
||||||
|
|
||||||
Options:
|
CLI Options:
|
||||||
|
-o, --output <string> path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
|
||||||
|
./output.png) (eg. output_%03d.png)
|
||||||
|
--preview-path <string> path to write preview image to (default: ./preview.png)
|
||||||
|
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
||||||
|
every step)
|
||||||
|
--output-begin-idx <int> starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
|
||||||
|
--canny apply canny preprocessor (edge detection)
|
||||||
|
--convert-name convert tensor name (for convert mode)
|
||||||
|
-v, --verbose print extra info
|
||||||
|
--color colors the logging tags according to level
|
||||||
|
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
|
||||||
|
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
|
||||||
|
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
|
||||||
|
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
|
||||||
|
Context Options:
|
||||||
-m, --model <string> path to full model
|
-m, --model <string> path to full model
|
||||||
--clip_l <string> path to the clip-l text encoder
|
--clip_l <string> path to the clip-l text encoder
|
||||||
--clip_g <string> path to the clip-g text encoder
|
--clip_g <string> path to the clip-g text encoder
|
||||||
--clip_vision <string> path to the clip-vision encoder
|
--clip_vision <string> path to the clip-vision encoder
|
||||||
--t5xxl <string> path to the t5xxl text encoder
|
--t5xxl <string> path to the t5xxl text encoder
|
||||||
--qwen2vl <string> path to the qwen2vl text encoder
|
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...)
|
||||||
--qwen2vl_vision <string> path to the qwen2vl vit
|
--llm_vision <string> path to the llm vit
|
||||||
|
--qwen2vl <string> alias of --llm. Deprecated.
|
||||||
|
--qwen2vl_vision <string> alias of --llm_vision. Deprecated.
|
||||||
--diffusion-model <string> path to the standalone diffusion model
|
--diffusion-model <string> path to the standalone diffusion model
|
||||||
--high-noise-diffusion-model <string> path to the standalone high noise diffusion model
|
--high-noise-diffusion-model <string> path to the standalone high noise diffusion model
|
||||||
--vae <string> path to standalone vae model
|
--vae <string> path to standalone vae model
|
||||||
--taesd <string> path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
--taesd <string> path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
||||||
|
--tae <string> alias of --taesd
|
||||||
--control-net <string> path to control net model
|
--control-net <string> path to control net model
|
||||||
--embd-dir <string> embeddings directory
|
--embd-dir <string> embeddings directory
|
||||||
--lora-model-dir <string> lora model directory
|
--lora-model-dir <string> lora model directory
|
||||||
-i, --init-img <string> path to the init image
|
|
||||||
--end-img <string> path to the end image, required by flf2v
|
|
||||||
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
|
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
|
||||||
--photo-maker <string> path to PHOTOMAKER model
|
--photo-maker <string> path to PHOTOMAKER model
|
||||||
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
|
--upscale-model <string> path to esrgan model.
|
||||||
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
|
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
|
||||||
|
CPU physical cores
|
||||||
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
|
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||||
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||||
|
--mmap whether to memory-map model
|
||||||
|
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||||
|
--clip-on-cpu keep clip in cpu (for low vram)
|
||||||
|
--vae-on-cpu keep vae in cpu (for low vram)
|
||||||
|
--fa use flash attention
|
||||||
|
--diffusion-fa use flash attention in the diffusion model only
|
||||||
|
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||||
|
--vae-conv-direct use ggml_conv2d_direct in the vae model
|
||||||
|
--circular enable circular padding for convolutions
|
||||||
|
--circularx enable circular RoPE wrapping on x-axis (width) only
|
||||||
|
--circulary enable circular RoPE wrapping on y-axis (height) only
|
||||||
|
--chroma-disable-dit-mask disable dit mask for chroma
|
||||||
|
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
|
||||||
|
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||||
|
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||||
|
type of the weight file
|
||||||
|
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
|
||||||
|
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
|
||||||
|
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
|
||||||
|
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
|
||||||
|
contain any quantized parameters, the at_runtime mode will be used; otherwise,
|
||||||
|
immediately will be used.The immediately mode may have precision and
|
||||||
|
compatibility issues with quantized parameters, but it usually offers faster inference
|
||||||
|
speed and, in some cases, lower memory usage. The at_runtime mode, on the
|
||||||
|
other hand, is exactly the opposite.
|
||||||
|
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
|
||||||
|
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
|
||||||
|
(overrides --vae-tile-size)
|
||||||
|
|
||||||
|
Generation Options:
|
||||||
|
-p, --prompt <string> the prompt to render
|
||||||
|
-n, --negative-prompt <string> the negative prompt (default: "")
|
||||||
|
-i, --init-img <string> path to the init image
|
||||||
|
--end-img <string> path to the end image, required by flf2v
|
||||||
--mask <string> path to the mask image
|
--mask <string> path to the mask image
|
||||||
--control-image <string> path to control image, control net
|
--control-image <string> path to control image, control net
|
||||||
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
|
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
|
||||||
lexicographical (character) order. For example, if the control video path is
|
lexicographical (character) order. For example, if the control video path is
|
||||||
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
||||||
-o, --output <string> path to write result image to (default: ./output.png)
|
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
|
||||||
-p, --prompt <string> the prompt to render
|
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
|
||||||
-n, --negative-prompt <string> the negative prompt (default: "")
|
|
||||||
--preview-path <string> path to write preview image to (default: ./preview.png)
|
|
||||||
--easycache <string> enable EasyCache for DiT models, accepts optional "threshold,start_percent,end_percent" values (defaults to 0.2,0.15,0.95)
|
|
||||||
--upscale-model <string> path to esrgan model.
|
|
||||||
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
|
|
||||||
CPU physical cores
|
|
||||||
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
|
|
||||||
-H, --height <int> image height, in pixel space (default: 512)
|
-H, --height <int> image height, in pixel space (default: 512)
|
||||||
-W, --width <int> image width, in pixel space (default: 512)
|
-W, --width <int> image width, in pixel space (default: 512)
|
||||||
--steps <int> number of sample steps (default: 20)
|
--steps <int> number of sample steps (default: 20)
|
||||||
@ -45,13 +96,12 @@ Options:
|
|||||||
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
|
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
|
||||||
will be 1 for SD1.x, 2 for SD2.x
|
will be 1 for SD1.x, 2 for SD2.x
|
||||||
-b, --batch-count <int> batch count
|
-b, --batch-count <int> batch count
|
||||||
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
|
||||||
--video-frames <int> video frames (default: 1)
|
--video-frames <int> video frames (default: 1)
|
||||||
--fps <int> fps (default: 24)
|
--fps <int> fps (default: 24)
|
||||||
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
|
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
|
||||||
NitroSD-Vibrant
|
NitroSD-Vibrant
|
||||||
--preview-interval <int> interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
|
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
|
||||||
every step)
|
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
|
||||||
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
|
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
|
||||||
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||||
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
|
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
@ -60,6 +110,7 @@ Options:
|
|||||||
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||||
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||||
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
||||||
|
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||||
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||||
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||||
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
@ -71,54 +122,28 @@ Options:
|
|||||||
--pm-style-strength <float>
|
--pm-style-strength <float>
|
||||||
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||||
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
|
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
|
||||||
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
|
||||||
--vace-strength <float> wan vace strength
|
--vace-strength <float> wan vace strength
|
||||||
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
|
||||||
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
|
||||||
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
|
||||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
|
||||||
--clip-on-cpu keep clip in cpu (for low vram)
|
|
||||||
--vae-on-cpu keep vae in cpu (for low vram)
|
|
||||||
--diffusion-fa use flash attention in the diffusion model
|
|
||||||
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
|
||||||
--vae-conv-direct use ggml_conv2d_direct in the vae model
|
|
||||||
--canny apply canny preprocessor (edge detection)
|
|
||||||
-v, --verbose print extra info
|
|
||||||
--color colors the logging tags according to level
|
|
||||||
--chroma-disable-dit-mask disable dit mask for chroma
|
|
||||||
--chroma-enable-t5-mask enable t5 mask for chroma
|
|
||||||
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||||
--disable-auto-resize-ref-image disable auto resize of ref images
|
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||||
--taesd-preview-only prevents usage of taesd for decoding the final image. (for use with --preview tae)
|
|
||||||
--preview-noisy enables previewing noisy inputs of the models rather than the denoised outputs
|
|
||||||
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
|
|
||||||
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
|
||||||
type of the weight file
|
|
||||||
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
|
|
||||||
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
|
|
||||||
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||||
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||||
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
|
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||||
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow]
|
otherwise)
|
||||||
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
|
|
||||||
contain any quantized parameters, the at_runtime mode will be used; otherwise,
|
|
||||||
immediately will be used.The immediately mode may have precision and
|
|
||||||
compatibility issues with quantized parameters, but it usually offers faster inference
|
|
||||||
speed and, in some cases, lower memory usage. The at_runtime mode, on the other
|
|
||||||
hand, is exactly the opposite.
|
|
||||||
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple], default:
|
|
||||||
discrete
|
|
||||||
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
|
||||||
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||||
ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise
|
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||||
--high-noise-scheduler (high noise) denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform,
|
euler_a otherwise
|
||||||
simple], default: discrete
|
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||||
|
kl_optimal, lcm, bong_tangent], default: discrete
|
||||||
|
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
|
||||||
|
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
||||||
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||||
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||||
-h, --help show this help message and exit
|
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level),
|
||||||
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
|
'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
|
||||||
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
|
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
||||||
(overrides --vae-tile-size)
|
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=;
|
||||||
--preview preview method. must be one of the following [none, proj, tae, vae] (default is none)
|
spectrum: w=,m=,lam=,window=,flex=,warmup=,stop=. Examples:
|
||||||
|
"threshold=0.25" or "threshold=1.5,reset=0" or "w=0.4,window=2"
|
||||||
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
```
|
```
|
||||||
|
|||||||
@ -172,9 +172,9 @@ int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int
|
|||||||
|
|
||||||
// Write '00dc' chunk (video frame)
|
// Write '00dc' chunk (video frame)
|
||||||
fwrite("00dc", 4, 1, f);
|
fwrite("00dc", 4, 1, f);
|
||||||
write_u32_le(f, jpeg_data.size);
|
write_u32_le(f, (uint32_t)jpeg_data.size);
|
||||||
index[i].offset = ftell(f) - 8;
|
index[i].offset = ftell(f) - 8;
|
||||||
index[i].size = jpeg_data.size;
|
index[i].size = (uint32_t)jpeg_data.size;
|
||||||
fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
|
fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
|
||||||
|
|
||||||
// Align to even byte size
|
// Align to even byte size
|
||||||
|
|||||||
2096
examples/common/common.hpp
Normal file
73
examples/server/CMakeLists.txt
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
set(TARGET sd-server)
|
||||||
|
|
||||||
|
option(SD_SERVER_BUILD_FRONTEND "Build server frontend with pnpm" ON)
|
||||||
|
|
||||||
|
set(FRONTEND_DIR "${CMAKE_CURRENT_SOURCE_DIR}/frontend")
|
||||||
|
set(GENERATED_HTML_HEADER "${FRONTEND_DIR}/dist/gen_index_html.h")
|
||||||
|
|
||||||
|
set(HAVE_FRONTEND_BUILD OFF)
|
||||||
|
|
||||||
|
if(SD_SERVER_BUILD_FRONTEND AND EXISTS "${FRONTEND_DIR}")
|
||||||
|
if(WIN32)
|
||||||
|
find_program(PNPM_EXECUTABLE NAMES pnpm.cmd pnpm)
|
||||||
|
else()
|
||||||
|
find_program(PNPM_EXECUTABLE NAMES pnpm)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(PNPM_EXECUTABLE)
|
||||||
|
message(STATUS "Frontend dir found: ${FRONTEND_DIR}")
|
||||||
|
message(STATUS "pnpm found: ${PNPM_EXECUTABLE}")
|
||||||
|
|
||||||
|
set(HAVE_FRONTEND_BUILD ON)
|
||||||
|
|
||||||
|
add_custom_target(${TARGET}_frontend_install
|
||||||
|
COMMAND "${PNPM_EXECUTABLE}" -C "${FRONTEND_DIR}" install
|
||||||
|
WORKING_DIRECTORY "${FRONTEND_DIR}"
|
||||||
|
COMMENT "Installing frontend dependencies"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
|
||||||
|
add_custom_target(${TARGET}_frontend_build
|
||||||
|
COMMAND "${PNPM_EXECUTABLE}" -C "${FRONTEND_DIR}" run build
|
||||||
|
WORKING_DIRECTORY "${FRONTEND_DIR}"
|
||||||
|
COMMENT "Building frontend"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
|
||||||
|
add_custom_target(${TARGET}_frontend_header
|
||||||
|
COMMAND "${PNPM_EXECUTABLE}" -C "${FRONTEND_DIR}" run build:header
|
||||||
|
WORKING_DIRECTORY "${FRONTEND_DIR}"
|
||||||
|
COMMENT "Generating gen_index_html.h"
|
||||||
|
VERBATIM
|
||||||
|
)
|
||||||
|
|
||||||
|
add_dependencies(${TARGET}_frontend_build ${TARGET}_frontend_install)
|
||||||
|
add_dependencies(${TARGET}_frontend_header ${TARGET}_frontend_build)
|
||||||
|
|
||||||
|
add_custom_target(${TARGET}_frontend
|
||||||
|
DEPENDS ${TARGET}_frontend_header
|
||||||
|
)
|
||||||
|
|
||||||
|
set_source_files_properties("${GENERATED_HTML_HEADER}" PROPERTIES GENERATED TRUE)
|
||||||
|
else()
|
||||||
|
message(WARNING "pnpm not found, frontend build disabled")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(STATUS "Frontend disabled or directory not found: ${FRONTEND_DIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_executable(${TARGET} main.cpp)
|
||||||
|
|
||||||
|
if(HAVE_FRONTEND_BUILD)
|
||||||
|
add_dependencies(${TARGET} ${TARGET}_frontend)
|
||||||
|
target_sources(${TARGET} PRIVATE "${GENERATED_HTML_HEADER}")
|
||||||
|
target_include_directories(${TARGET} PRIVATE "${FRONTEND_DIR}/dist")
|
||||||
|
target_compile_definitions(${TARGET} PRIVATE HAVE_INDEX_HTML)
|
||||||
|
message(STATUS "HAVE_INDEX_HTML enabled")
|
||||||
|
else()
|
||||||
|
message(STATUS "HAVE_INDEX_HTML disabled")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
|
||||||
227
examples/server/README.md
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
# Frontend
|
||||||
|
|
||||||
|
## Build with Frontend
|
||||||
|
|
||||||
|
The server can optionally build the web frontend and embed it into the binary as `gen_index_html.h`.
|
||||||
|
|
||||||
|
### Requirements
|
||||||
|
|
||||||
|
Install the following tools:
|
||||||
|
|
||||||
|
* **Node.js** ≥ 22.18
|
||||||
|
https://nodejs.org/
|
||||||
|
|
||||||
|
* **pnpm** ≥ 10
|
||||||
|
Install via npm:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install -g pnpm
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify installation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node -v
|
||||||
|
pnpm -v
|
||||||
|
```
|
||||||
|
|
||||||
|
### Install frontend dependencies
|
||||||
|
|
||||||
|
Go to the frontend directory and install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd examples/server/frontend
|
||||||
|
pnpm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build the server with CMake
|
||||||
|
|
||||||
|
Enable the frontend build option when configuring CMake:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cmake -B build -DSD_SERVER_BUILD_FRONTEND=ON
|
||||||
|
cmake --build build --config Release
|
||||||
|
```
|
||||||
|
|
||||||
|
If `pnpm` is available, the build system will automatically run:
|
||||||
|
|
||||||
|
```
|
||||||
|
pnpm run build
|
||||||
|
pnpm run build:header
|
||||||
|
```
|
||||||
|
|
||||||
|
and embed the generated frontend into the server binary.
|
||||||
|
|
||||||
|
## Frontend Repository
|
||||||
|
|
||||||
|
The web frontend is maintained in a **separate repository**, https://github.com/leejet/stable-ui.
|
||||||
|
|
||||||
|
If you want to modify the UI or frontend logic, please submit pull requests to the **frontend repository**.
|
||||||
|
|
||||||
|
This repository (`stable-diffusion.cpp`) only vendors the frontend periodically. Changes from the frontend repo are synchronized:
|
||||||
|
|
||||||
|
* approximately **every 1–2 weeks**, or
|
||||||
|
* when there are **major frontend updates**
|
||||||
|
|
||||||
|
Because of this, frontend changes will **not appear here immediately** after being merged upstream.
|
||||||
|
|
||||||
|
## Using an external frontend
|
||||||
|
|
||||||
|
By default, the server uses the **embedded frontend** generated during the build (`gen_index_html.h`).
|
||||||
|
|
||||||
|
You can also serve a custom frontend file instead of the embedded one by using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--serve-html-path <path-to-index.html>
|
||||||
|
```
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sd-server --serve-html-path ./index.html
|
||||||
|
```
|
||||||
|
|
||||||
|
In this case, the server will load and serve the specified `index.html` file instead of the embedded frontend. This is useful when:
|
||||||
|
|
||||||
|
* developing or testing frontend changes
|
||||||
|
* using a custom UI
|
||||||
|
* avoiding rebuilding the binary after frontend modifications
|
||||||
|
|
||||||
|
# Run
|
||||||
|
|
||||||
|
```
|
||||||
|
usage: ./bin/sd-server [options]
|
||||||
|
|
||||||
|
Svr Options:
|
||||||
|
-l, --listen-ip <string> server listen ip (default: 127.0.0.1)
|
||||||
|
--serve-html-path <string> path to HTML file to serve at root (optional)
|
||||||
|
--listen-port <int> server listen port (default: 1234)
|
||||||
|
-v, --verbose print extra info
|
||||||
|
--color colors the logging tags according to level
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
|
||||||
|
Context Options:
|
||||||
|
-m, --model <string> path to full model
|
||||||
|
--clip_l <string> path to the clip-l text encoder
|
||||||
|
--clip_g <string> path to the clip-g text encoder
|
||||||
|
--clip_vision <string> path to the clip-vision encoder
|
||||||
|
--t5xxl <string> path to the t5xxl text encoder
|
||||||
|
--llm <string> path to the llm text encoder. For example: (qwenvl2.5 for qwen-image, mistral-small3.2 for flux2, ...)
|
||||||
|
--llm_vision <string> path to the llm vit
|
||||||
|
--qwen2vl <string> alias of --llm. Deprecated.
|
||||||
|
--qwen2vl_vision <string> alias of --llm_vision. Deprecated.
|
||||||
|
--diffusion-model <string> path to the standalone diffusion model
|
||||||
|
--high-noise-diffusion-model <string> path to the standalone high noise diffusion model
|
||||||
|
--vae <string> path to standalone vae model
|
||||||
|
--taesd <string> path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
|
||||||
|
--tae <string> alias of --taesd
|
||||||
|
--control-net <string> path to control net model
|
||||||
|
--embd-dir <string> embeddings directory
|
||||||
|
--lora-model-dir <string> lora model directory
|
||||||
|
--tensor-type-rules <string> weight type per tensor pattern (example: "^vae\.=f16,model\.=q8_0")
|
||||||
|
--photo-maker <string> path to PHOTOMAKER model
|
||||||
|
--upscale-model <string> path to esrgan model.
|
||||||
|
-t, --threads <int> number of threads to use during computation (default: -1). If threads <= 0, then threads will be set to the number of
|
||||||
|
CPU physical cores
|
||||||
|
--chroma-t5-mask-pad <int> t5 mask pad size of chroma
|
||||||
|
--vae-tile-overlap <float> tile overlap for vae tiling, in fraction of tile size (default: 0.5)
|
||||||
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
|
--force-sdxl-vae-conv-scale force use of conv scale on sdxl vae
|
||||||
|
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
|
||||||
|
--mmap whether to memory-map model
|
||||||
|
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||||
|
--clip-on-cpu keep clip in cpu (for low vram)
|
||||||
|
--vae-on-cpu keep vae in cpu (for low vram)
|
||||||
|
--fa use flash attention
|
||||||
|
--diffusion-fa use flash attention in the diffusion model only
|
||||||
|
--diffusion-conv-direct use ggml_conv2d_direct in the diffusion model
|
||||||
|
--vae-conv-direct use ggml_conv2d_direct in the vae model
|
||||||
|
--circular enable circular padding for convolutions
|
||||||
|
--circularx enable circular RoPE wrapping on x-axis (width) only
|
||||||
|
--circulary enable circular RoPE wrapping on y-axis (height) only
|
||||||
|
--chroma-disable-dit-mask disable dit mask for chroma
|
||||||
|
--qwen-image-zero-cond-t enable zero_cond_t for qwen image
|
||||||
|
--chroma-enable-t5-mask enable t5 mask for chroma
|
||||||
|
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
|
||||||
|
type of the weight file
|
||||||
|
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
|
||||||
|
--sampler-rng sampler RNG, one of [std_default, cuda, cpu]. If not specified, use --rng
|
||||||
|
--prediction prediction type override, one of [eps, v, edm_v, sd3_flow, flux_flow, flux2_flow]
|
||||||
|
--lora-apply-mode the way to apply LoRA, one of [auto, immediately, at_runtime], default is auto. In auto mode, if the model weights
|
||||||
|
contain any quantized parameters, the at_runtime mode will be used; otherwise,
|
||||||
|
immediately will be used.The immediately mode may have precision and
|
||||||
|
compatibility issues with quantized parameters, but it usually offers faster inference
|
||||||
|
speed and, in some cases, lower memory usage. The at_runtime mode, on the
|
||||||
|
other hand, is exactly the opposite.
|
||||||
|
--vae-tile-size tile size for vae tiling, format [X]x[Y] (default: 32x32)
|
||||||
|
--vae-relative-tile-size relative tile size for vae tiling, format [X]x[Y], in fraction of image size if < 1, in number of tiles per dim if >=1
|
||||||
|
(overrides --vae-tile-size)
|
||||||
|
|
||||||
|
Default Generation Options:
|
||||||
|
-p, --prompt <string> the prompt to render
|
||||||
|
-n, --negative-prompt <string> the negative prompt (default: "")
|
||||||
|
-i, --init-img <string> path to the init image
|
||||||
|
--end-img <string> path to the end image, required by flf2v
|
||||||
|
--mask <string> path to the mask image
|
||||||
|
--control-image <string> path to control image, control net
|
||||||
|
--control-video <string> path to control video frames, It must be a directory path. The video frames inside should be stored as images in
|
||||||
|
lexicographical (character) order. For example, if the control video path is
|
||||||
|
`frames`, the directory contain images such as 00.png, 01.png, ... etc.
|
||||||
|
--pm-id-images-dir <string> path to PHOTOMAKER input id images dir
|
||||||
|
--pm-id-embed-path <string> path to PHOTOMAKER v2 id embed
|
||||||
|
-H, --height <int> image height, in pixel space (default: 512)
|
||||||
|
-W, --width <int> image width, in pixel space (default: 512)
|
||||||
|
--steps <int> number of sample steps (default: 20)
|
||||||
|
--high-noise-steps <int> (high noise) number of sample steps (default: -1 = auto)
|
||||||
|
--clip-skip <int> ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified,
|
||||||
|
will be 1 for SD1.x, 2 for SD2.x
|
||||||
|
-b, --batch-count <int> batch count
|
||||||
|
--video-frames <int> video frames (default: 1)
|
||||||
|
--fps <int> fps (default: 24)
|
||||||
|
--timestep-shift <int> shift timestep for NitroFusion models (default: 0). recommended N for NitroSD-Realism around 250 and 500 for
|
||||||
|
NitroSD-Vibrant
|
||||||
|
--upscale-repeats <int> Run the ESRGAN upscaler this many times (default: 1)
|
||||||
|
--upscale-tile-size <int> tile size for ESRGAN upscaling (default: 128)
|
||||||
|
--cfg-scale <float> unconditional guidance scale: (default: 7.0)
|
||||||
|
--img-cfg-scale <float> image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
|
||||||
|
--guidance <float> distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
|
--slg-scale <float> skip layer guidance (SLG) scale, only for DiT models: (default: 0). 0 means disabled, a value of 2.5 is nice for sd3.5
|
||||||
|
medium
|
||||||
|
--skip-layer-start <float> SLG enabling point (default: 0.01)
|
||||||
|
--skip-layer-end <float> SLG disabling point (default: 0.2)
|
||||||
|
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
|
||||||
|
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
|
||||||
|
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
|
||||||
|
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
|
||||||
|
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
|
||||||
|
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
|
||||||
|
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
|
||||||
|
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
|
||||||
|
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
|
||||||
|
--strength <float> strength for noising/unnoising (default: 0.75)
|
||||||
|
--pm-style-strength <float>
|
||||||
|
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
|
||||||
|
--moe-boundary <float> timestep boundary for Wan2.2 MoE model. (default: 0.875). Only enabled if `--high-noise-steps` is set to -1
|
||||||
|
--vace-strength <float> wan vace strength
|
||||||
|
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
|
||||||
|
--disable-auto-resize-ref-image disable auto resize of ref images
|
||||||
|
-s, --seed RNG seed (default: 42, use random seed for < 0)
|
||||||
|
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
|
||||||
|
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
|
||||||
|
otherwise)
|
||||||
|
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
|
||||||
|
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
|
||||||
|
euler_a otherwise
|
||||||
|
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
|
||||||
|
kl_optimal, lcm, bong_tangent], default: discrete
|
||||||
|
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
|
||||||
|
--skip-layers layers to skip for SLG steps (default: [7,8,9])
|
||||||
|
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])
|
||||||
|
-r, --ref-image reference image for Flux Kontext models (can be used multiple times)
|
||||||
|
--cache-mode caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level), 'spectrum' (UNET/DiT Chebyshev+Taylor forecasting)
|
||||||
|
--cache-option named cache params (key=value format, comma-separated). easycache/ucache:
|
||||||
|
threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples:
|
||||||
|
"threshold=0.25" or "threshold=1.5,reset=0"
|
||||||
|
--scm-mask SCM steps mask for cache-dit: comma-separated 0/1 (e.g., "1,1,1,0,0,1,0,0,1,0") - 1=compute, 0=can cache
|
||||||
|
--scm-policy SCM policy: 'dynamic' (default) or 'static'
|
||||||
|
```
|
||||||
1
examples/server/frontend
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 1a34176cd6d39ad3a226b2b69047e71f6797f6bc
|
||||||
1238
examples/server/main.cpp
Normal file
@ -1,4 +1,4 @@
|
|||||||
for f in *.cpp *.h *.hpp examples/cli/*.cpp examples/cli/*.h; do
|
for f in src/*.cpp src/*.h src/*.hpp src/vocab/*.h src/vocab/*.cpp examples/cli/*.cpp examples/common/*.hpp examples/cli/*.h examples/server/*.cpp; do
|
||||||
[[ "$f" == vocab* ]] && continue
|
[[ "$f" == vocab* ]] && continue
|
||||||
echo "formatting '$f'"
|
echo "formatting '$f'"
|
||||||
# if [ "$f" != "stable-diffusion.h" ]; then
|
# if [ "$f" != "stable-diffusion.h" ]; then
|
||||||
|
|||||||
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit 2d3876d554551d35c06dccc5852be50d5fd2a275
|
Subproject commit a8db410a252c8c8f2d120c6f2e7133ebe032f35d
|
||||||
@ -36,42 +36,45 @@ enum rng_type_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum sample_method_t {
|
enum sample_method_t {
|
||||||
SAMPLE_METHOD_DEFAULT,
|
EULER_SAMPLE_METHOD,
|
||||||
EULER,
|
EULER_A_SAMPLE_METHOD,
|
||||||
HEUN,
|
HEUN_SAMPLE_METHOD,
|
||||||
DPM2,
|
DPM2_SAMPLE_METHOD,
|
||||||
DPMPP2S_A,
|
DPMPP2S_A_SAMPLE_METHOD,
|
||||||
DPMPP2M,
|
DPMPP2M_SAMPLE_METHOD,
|
||||||
DPMPP2Mv2,
|
DPMPP2Mv2_SAMPLE_METHOD,
|
||||||
IPNDM,
|
IPNDM_SAMPLE_METHOD,
|
||||||
IPNDM_V,
|
IPNDM_V_SAMPLE_METHOD,
|
||||||
LCM,
|
LCM_SAMPLE_METHOD,
|
||||||
DDIM_TRAILING,
|
DDIM_TRAILING_SAMPLE_METHOD,
|
||||||
TCD,
|
TCD_SAMPLE_METHOD,
|
||||||
EULER_A,
|
RES_MULTISTEP_SAMPLE_METHOD,
|
||||||
|
RES_2S_SAMPLE_METHOD,
|
||||||
SAMPLE_METHOD_COUNT
|
SAMPLE_METHOD_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
enum scheduler_t {
|
enum scheduler_t {
|
||||||
DEFAULT,
|
DISCRETE_SCHEDULER,
|
||||||
DISCRETE,
|
KARRAS_SCHEDULER,
|
||||||
KARRAS,
|
EXPONENTIAL_SCHEDULER,
|
||||||
EXPONENTIAL,
|
AYS_SCHEDULER,
|
||||||
AYS,
|
GITS_SCHEDULER,
|
||||||
GITS,
|
SGM_UNIFORM_SCHEDULER,
|
||||||
SGM_UNIFORM,
|
SIMPLE_SCHEDULER,
|
||||||
SIMPLE,
|
SMOOTHSTEP_SCHEDULER,
|
||||||
SMOOTHSTEP,
|
KL_OPTIMAL_SCHEDULER,
|
||||||
SCHEDULE_COUNT
|
LCM_SCHEDULER,
|
||||||
|
BONG_TANGENT_SCHEDULER,
|
||||||
|
SCHEDULER_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
enum prediction_t {
|
enum prediction_t {
|
||||||
DEFAULT_PRED,
|
|
||||||
EPS_PRED,
|
EPS_PRED,
|
||||||
V_PRED,
|
V_PRED,
|
||||||
EDM_V_PRED,
|
EDM_V_PRED,
|
||||||
SD3_FLOW_PRED,
|
FLOW_PRED,
|
||||||
FLUX_FLOW_PRED,
|
FLUX_FLOW_PRED,
|
||||||
|
FLUX2_FLOW_PRED,
|
||||||
PREDICTION_COUNT
|
PREDICTION_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -151,21 +154,26 @@ typedef struct {
|
|||||||
float rel_size_y;
|
float rel_size_y;
|
||||||
} sd_tiling_params_t;
|
} sd_tiling_params_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char* name;
|
||||||
|
const char* path;
|
||||||
|
} sd_embedding_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char* model_path;
|
const char* model_path;
|
||||||
const char* clip_l_path;
|
const char* clip_l_path;
|
||||||
const char* clip_g_path;
|
const char* clip_g_path;
|
||||||
const char* clip_vision_path;
|
const char* clip_vision_path;
|
||||||
const char* t5xxl_path;
|
const char* t5xxl_path;
|
||||||
const char* qwen2vl_path;
|
const char* llm_path;
|
||||||
const char* qwen2vl_vision_path;
|
const char* llm_vision_path;
|
||||||
const char* diffusion_model_path;
|
const char* diffusion_model_path;
|
||||||
const char* high_noise_diffusion_model_path;
|
const char* high_noise_diffusion_model_path;
|
||||||
const char* vae_path;
|
const char* vae_path;
|
||||||
const char* taesd_path;
|
const char* taesd_path;
|
||||||
const char* control_net_path;
|
const char* control_net_path;
|
||||||
const char* lora_model_dir;
|
const sd_embedding_t* embeddings;
|
||||||
const char* embedding_dir;
|
uint32_t embedding_count;
|
||||||
const char* photo_maker_path;
|
const char* photo_maker_path;
|
||||||
const char* tensor_type_rules;
|
const char* tensor_type_rules;
|
||||||
bool vae_decode_only;
|
bool vae_decode_only;
|
||||||
@ -177,18 +185,22 @@ typedef struct {
|
|||||||
enum prediction_t prediction;
|
enum prediction_t prediction;
|
||||||
enum lora_apply_mode_t lora_apply_mode;
|
enum lora_apply_mode_t lora_apply_mode;
|
||||||
bool offload_params_to_cpu;
|
bool offload_params_to_cpu;
|
||||||
|
bool enable_mmap;
|
||||||
bool keep_clip_on_cpu;
|
bool keep_clip_on_cpu;
|
||||||
bool keep_control_net_on_cpu;
|
bool keep_control_net_on_cpu;
|
||||||
bool keep_vae_on_cpu;
|
bool keep_vae_on_cpu;
|
||||||
|
bool flash_attn;
|
||||||
bool diffusion_flash_attn;
|
bool diffusion_flash_attn;
|
||||||
bool tae_preview_only;
|
bool tae_preview_only;
|
||||||
bool diffusion_conv_direct;
|
bool diffusion_conv_direct;
|
||||||
bool vae_conv_direct;
|
bool vae_conv_direct;
|
||||||
|
bool circular_x;
|
||||||
|
bool circular_y;
|
||||||
bool force_sdxl_vae_conv_scale;
|
bool force_sdxl_vae_conv_scale;
|
||||||
bool chroma_use_dit_mask;
|
bool chroma_use_dit_mask;
|
||||||
bool chroma_use_t5_mask;
|
bool chroma_use_t5_mask;
|
||||||
int chroma_t5_mask_pad;
|
int chroma_t5_mask_pad;
|
||||||
float flow_shift;
|
bool qwen_image_zero_cond_t;
|
||||||
} sd_ctx_params_t;
|
} sd_ctx_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -220,6 +232,9 @@ typedef struct {
|
|||||||
int sample_steps;
|
int sample_steps;
|
||||||
float eta;
|
float eta;
|
||||||
int shifted_timestep;
|
int shifted_timestep;
|
||||||
|
float* custom_sigmas;
|
||||||
|
int custom_sigmas_count;
|
||||||
|
float flow_shift;
|
||||||
} sd_sample_params_t;
|
} sd_sample_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -229,14 +244,52 @@ typedef struct {
|
|||||||
float style_strength;
|
float style_strength;
|
||||||
} sd_pm_params_t; // photo maker
|
} sd_pm_params_t; // photo maker
|
||||||
|
|
||||||
|
enum sd_cache_mode_t {
|
||||||
|
SD_CACHE_DISABLED = 0,
|
||||||
|
SD_CACHE_EASYCACHE,
|
||||||
|
SD_CACHE_UCACHE,
|
||||||
|
SD_CACHE_DBCACHE,
|
||||||
|
SD_CACHE_TAYLORSEER,
|
||||||
|
SD_CACHE_CACHE_DIT,
|
||||||
|
SD_CACHE_SPECTRUM,
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
bool enabled;
|
enum sd_cache_mode_t mode;
|
||||||
float reuse_threshold;
|
float reuse_threshold;
|
||||||
float start_percent;
|
float start_percent;
|
||||||
float end_percent;
|
float end_percent;
|
||||||
} sd_easycache_params_t;
|
float error_decay_rate;
|
||||||
|
bool use_relative_threshold;
|
||||||
|
bool reset_error_on_compute;
|
||||||
|
int Fn_compute_blocks;
|
||||||
|
int Bn_compute_blocks;
|
||||||
|
float residual_diff_threshold;
|
||||||
|
int max_warmup_steps;
|
||||||
|
int max_cached_steps;
|
||||||
|
int max_continuous_cached_steps;
|
||||||
|
int taylorseer_n_derivatives;
|
||||||
|
int taylorseer_skip_interval;
|
||||||
|
const char* scm_mask;
|
||||||
|
bool scm_policy_dynamic;
|
||||||
|
float spectrum_w;
|
||||||
|
int spectrum_m;
|
||||||
|
float spectrum_lam;
|
||||||
|
int spectrum_window_size;
|
||||||
|
float spectrum_flex_window;
|
||||||
|
int spectrum_warmup_steps;
|
||||||
|
float spectrum_stop_percent;
|
||||||
|
} sd_cache_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
bool is_high_noise;
|
||||||
|
float multiplier;
|
||||||
|
const char* path;
|
||||||
|
} sd_lora_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const sd_lora_t* loras;
|
||||||
|
uint32_t lora_count;
|
||||||
const char* prompt;
|
const char* prompt;
|
||||||
const char* negative_prompt;
|
const char* negative_prompt;
|
||||||
int clip_skip;
|
int clip_skip;
|
||||||
@ -256,10 +309,12 @@ typedef struct {
|
|||||||
float control_strength;
|
float control_strength;
|
||||||
sd_pm_params_t pm_params;
|
sd_pm_params_t pm_params;
|
||||||
sd_tiling_params_t vae_tiling_params;
|
sd_tiling_params_t vae_tiling_params;
|
||||||
sd_easycache_params_t easycache;
|
sd_cache_params_t cache;
|
||||||
} sd_img_gen_params_t;
|
} sd_img_gen_params_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
const sd_lora_t* loras;
|
||||||
|
uint32_t lora_count;
|
||||||
const char* prompt;
|
const char* prompt;
|
||||||
const char* negative_prompt;
|
const char* negative_prompt;
|
||||||
int clip_skip;
|
int clip_skip;
|
||||||
@ -276,19 +331,20 @@ typedef struct {
|
|||||||
int64_t seed;
|
int64_t seed;
|
||||||
int video_frames;
|
int video_frames;
|
||||||
float vace_strength;
|
float vace_strength;
|
||||||
sd_easycache_params_t easycache;
|
sd_tiling_params_t vae_tiling_params;
|
||||||
|
sd_cache_params_t cache;
|
||||||
} sd_vid_gen_params_t;
|
} sd_vid_gen_params_t;
|
||||||
|
|
||||||
typedef struct sd_ctx_t sd_ctx_t;
|
typedef struct sd_ctx_t sd_ctx_t;
|
||||||
|
|
||||||
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
|
typedef void (*sd_log_cb_t)(enum sd_log_level_t level, const char* text, void* data);
|
||||||
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
|
typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
|
||||||
typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy);
|
typedef void (*sd_preview_cb_t)(int step, int frame_count, sd_image_t* frames, bool is_noisy, void* data);
|
||||||
|
|
||||||
SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
|
SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
|
||||||
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
|
SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
|
||||||
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy);
|
SD_API void sd_set_preview_callback(sd_preview_cb_t cb, enum preview_t mode, int interval, bool denoised, bool noisy, void* data);
|
||||||
SD_API int32_t get_num_physical_cores();
|
SD_API int32_t sd_get_num_physical_cores();
|
||||||
SD_API const char* sd_get_system_info();
|
SD_API const char* sd_get_system_info();
|
||||||
|
|
||||||
SD_API const char* sd_type_name(enum sd_type_t type);
|
SD_API const char* sd_type_name(enum sd_type_t type);
|
||||||
@ -297,8 +353,8 @@ SD_API const char* sd_rng_type_name(enum rng_type_t rng_type);
|
|||||||
SD_API enum rng_type_t str_to_rng_type(const char* str);
|
SD_API enum rng_type_t str_to_rng_type(const char* str);
|
||||||
SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
|
SD_API const char* sd_sample_method_name(enum sample_method_t sample_method);
|
||||||
SD_API enum sample_method_t str_to_sample_method(const char* str);
|
SD_API enum sample_method_t str_to_sample_method(const char* str);
|
||||||
SD_API const char* sd_schedule_name(enum scheduler_t scheduler);
|
SD_API const char* sd_scheduler_name(enum scheduler_t scheduler);
|
||||||
SD_API enum scheduler_t str_to_schedule(const char* str);
|
SD_API enum scheduler_t str_to_scheduler(const char* str);
|
||||||
SD_API const char* sd_prediction_name(enum prediction_t prediction);
|
SD_API const char* sd_prediction_name(enum prediction_t prediction);
|
||||||
SD_API enum prediction_t str_to_prediction(const char* str);
|
SD_API enum prediction_t str_to_prediction(const char* str);
|
||||||
SD_API const char* sd_preview_name(enum preview_t preview);
|
SD_API const char* sd_preview_name(enum preview_t preview);
|
||||||
@ -306,18 +362,20 @@ SD_API enum preview_t str_to_preview(const char* str);
|
|||||||
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
|
SD_API const char* sd_lora_apply_mode_name(enum lora_apply_mode_t mode);
|
||||||
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
|
SD_API enum lora_apply_mode_t str_to_lora_apply_mode(const char* str);
|
||||||
|
|
||||||
SD_API void sd_easycache_params_init(sd_easycache_params_t* easycache_params);
|
SD_API void sd_cache_params_init(sd_cache_params_t* cache_params);
|
||||||
|
|
||||||
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
SD_API void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params);
|
||||||
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
SD_API char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params);
|
||||||
|
|
||||||
SD_API sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params);
|
SD_API sd_ctx_t* new_sd_ctx(const sd_ctx_params_t* sd_ctx_params);
|
||||||
SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
|
SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
|
||||||
SD_API enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx);
|
|
||||||
|
|
||||||
SD_API void sd_sample_params_init(sd_sample_params_t* sample_params);
|
SD_API void sd_sample_params_init(sd_sample_params_t* sample_params);
|
||||||
SD_API char* sd_sample_params_to_str(const sd_sample_params_t* sample_params);
|
SD_API char* sd_sample_params_to_str(const sd_sample_params_t* sample_params);
|
||||||
|
|
||||||
|
SD_API enum sample_method_t sd_get_default_sample_method(const sd_ctx_t* sd_ctx);
|
||||||
|
SD_API enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_method_t sample_method);
|
||||||
|
|
||||||
SD_API void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params);
|
SD_API void sd_img_gen_params_init(sd_img_gen_params_t* sd_img_gen_params);
|
||||||
SD_API char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params);
|
SD_API char* sd_img_gen_params_to_str(const sd_img_gen_params_t* sd_img_gen_params);
|
||||||
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params);
|
SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_gen_params);
|
||||||
@ -330,7 +388,8 @@ typedef struct upscaler_ctx_t upscaler_ctx_t;
|
|||||||
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
bool direct,
|
bool direct,
|
||||||
int n_threads);
|
int n_threads,
|
||||||
|
int tile_size);
|
||||||
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx);
|
||||||
|
|
||||||
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
|
SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx,
|
||||||
@ -343,7 +402,8 @@ SD_API bool convert(const char* input_path,
|
|||||||
const char* vae_path,
|
const char* vae_path,
|
||||||
const char* output_path,
|
const char* output_path,
|
||||||
enum sd_type_t output_type,
|
enum sd_type_t output_type,
|
||||||
const char* tensor_type_rules);
|
const char* tensor_type_rules,
|
||||||
|
bool convert_name);
|
||||||
|
|
||||||
SD_API bool preprocess_canny(sd_image_t image,
|
SD_API bool preprocess_canny(sd_image_t image,
|
||||||
float high_threshold,
|
float high_threshold,
|
||||||
@ -352,6 +412,9 @@ SD_API bool preprocess_canny(sd_image_t image,
|
|||||||
float strong,
|
float strong,
|
||||||
bool inverse);
|
bool inverse);
|
||||||
|
|
||||||
|
SD_API const char* sd_commit(void);
|
||||||
|
SD_API const char* sd_version(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
408
rope.hpp
@ -1,408 +0,0 @@
|
|||||||
#ifndef __ROPE_HPP__
|
|
||||||
#define __ROPE_HPP__
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
|
|
||||||
namespace Rope {
|
|
||||||
template <class T>
|
|
||||||
__STATIC_INLINE__ std::vector<T> linspace(T start, T end, int num) {
|
|
||||||
std::vector<T> result(num);
|
|
||||||
if (num == 1) {
|
|
||||||
result[0] = start;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
T step = (end - start) / (num - 1);
|
|
||||||
for (int i = 0; i < num; ++i) {
|
|
||||||
result[i] = start + i * step;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> transpose(const std::vector<std::vector<float>>& mat) {
|
|
||||||
int rows = mat.size();
|
|
||||||
int cols = mat[0].size();
|
|
||||||
std::vector<std::vector<float>> transposed(cols, std::vector<float>(rows));
|
|
||||||
for (int i = 0; i < rows; ++i) {
|
|
||||||
for (int j = 0; j < cols; ++j) {
|
|
||||||
transposed[j][i] = mat[i][j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return transposed;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<float> flatten(const std::vector<std::vector<float>>& vec) {
|
|
||||||
std::vector<float> flat_vec;
|
|
||||||
for (const auto& sub_vec : vec) {
|
|
||||||
flat_vec.insert(flat_vec.end(), sub_vec.begin(), sub_vec.end());
|
|
||||||
}
|
|
||||||
return flat_vec;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> rope(const std::vector<float>& pos, int dim, int theta) {
|
|
||||||
assert(dim % 2 == 0);
|
|
||||||
int half_dim = dim / 2;
|
|
||||||
|
|
||||||
std::vector<float> scale = linspace(0.f, (dim * 1.f - 2) / dim, half_dim);
|
|
||||||
|
|
||||||
std::vector<float> omega(half_dim);
|
|
||||||
for (int i = 0; i < half_dim; ++i) {
|
|
||||||
omega[i] = 1.0 / std::pow(theta, scale[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
int pos_size = pos.size();
|
|
||||||
std::vector<std::vector<float>> out(pos_size, std::vector<float>(half_dim));
|
|
||||||
for (int i = 0; i < pos_size; ++i) {
|
|
||||||
for (int j = 0; j < half_dim; ++j) {
|
|
||||||
out[i][j] = pos[i] * omega[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> result(pos_size, std::vector<float>(half_dim * 4));
|
|
||||||
for (int i = 0; i < pos_size; ++i) {
|
|
||||||
for (int j = 0; j < half_dim; ++j) {
|
|
||||||
result[i][4 * j] = std::cos(out[i][j]);
|
|
||||||
result[i][4 * j + 1] = -std::sin(out[i][j]);
|
|
||||||
result[i][4 * j + 2] = std::sin(out[i][j]);
|
|
||||||
result[i][4 * j + 3] = std::cos(out[i][j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate IDs for image patches and text
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_txt_ids(int bs, int context_len) {
|
|
||||||
return std::vector<std::vector<float>>(bs * context_len, std::vector<float>(3, 0.0));
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_img_ids(int h, int w, int patch_size, int bs, int index = 0, int h_offset = 0, int w_offset = 0) {
|
|
||||||
int h_len = (h + (patch_size / 2)) / patch_size;
|
|
||||||
int w_len = (w + (patch_size / 2)) / patch_size;
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> img_ids(h_len * w_len, std::vector<float>(3, 0.0));
|
|
||||||
|
|
||||||
std::vector<float> row_ids = linspace<float>(h_offset, h_len - 1 + h_offset, h_len);
|
|
||||||
std::vector<float> col_ids = linspace<float>(w_offset, w_len - 1 + w_offset, w_len);
|
|
||||||
|
|
||||||
for (int i = 0; i < h_len; ++i) {
|
|
||||||
for (int j = 0; j < w_len; ++j) {
|
|
||||||
img_ids[i * w_len + j][0] = index;
|
|
||||||
img_ids[i * w_len + j][1] = row_ids[i];
|
|
||||||
img_ids[i * w_len + j][2] = col_ids[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> img_ids_repeated(bs * img_ids.size(), std::vector<float>(3));
|
|
||||||
for (int i = 0; i < bs; ++i) {
|
|
||||||
for (int j = 0; j < img_ids.size(); ++j) {
|
|
||||||
img_ids_repeated[i * img_ids.size() + j] = img_ids[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return img_ids_repeated;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> concat_ids(const std::vector<std::vector<float>>& a,
|
|
||||||
const std::vector<std::vector<float>>& b,
|
|
||||||
int bs) {
|
|
||||||
size_t a_len = a.size() / bs;
|
|
||||||
size_t b_len = b.size() / bs;
|
|
||||||
std::vector<std::vector<float>> ids(a.size() + b.size(), std::vector<float>(3));
|
|
||||||
for (int i = 0; i < bs; ++i) {
|
|
||||||
for (int j = 0; j < a_len; ++j) {
|
|
||||||
ids[i * (a_len + b_len) + j] = a[i * a_len + j];
|
|
||||||
}
|
|
||||||
for (int j = 0; j < b_len; ++j) {
|
|
||||||
ids[i * (a_len + b_len) + a_len + j] = b[i * b_len + j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<float> embed_nd(const std::vector<std::vector<float>>& ids,
|
|
||||||
int bs,
|
|
||||||
int theta,
|
|
||||||
const std::vector<int>& axes_dim) {
|
|
||||||
std::vector<std::vector<float>> trans_ids = transpose(ids);
|
|
||||||
size_t pos_len = ids.size() / bs;
|
|
||||||
int num_axes = axes_dim.size();
|
|
||||||
// for (int i = 0; i < pos_len; i++) {
|
|
||||||
// std::cout << trans_ids[0][i] << " " << trans_ids[1][i] << " " << trans_ids[2][i] << std::endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
int emb_dim = 0;
|
|
||||||
for (int d : axes_dim)
|
|
||||||
emb_dim += d / 2;
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> emb(bs * pos_len, std::vector<float>(emb_dim * 2 * 2, 0.0));
|
|
||||||
int offset = 0;
|
|
||||||
for (int i = 0; i < num_axes; ++i) {
|
|
||||||
std::vector<std::vector<float>> rope_emb = rope(trans_ids[i], axes_dim[i], theta); // [bs*pos_len, axes_dim[i]/2 * 2 * 2]
|
|
||||||
for (int b = 0; b < bs; ++b) {
|
|
||||||
for (int j = 0; j < pos_len; ++j) {
|
|
||||||
for (int k = 0; k < rope_emb[0].size(); ++k) {
|
|
||||||
emb[b * pos_len + j][offset + k] = rope_emb[j][k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
offset += rope_emb[0].size();
|
|
||||||
}
|
|
||||||
|
|
||||||
return flatten(emb);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_refs_ids(int patch_size,
|
|
||||||
int bs,
|
|
||||||
const std::vector<ggml_tensor*>& ref_latents,
|
|
||||||
bool increase_ref_index) {
|
|
||||||
std::vector<std::vector<float>> ids;
|
|
||||||
uint64_t curr_h_offset = 0;
|
|
||||||
uint64_t curr_w_offset = 0;
|
|
||||||
int index = 1;
|
|
||||||
for (ggml_tensor* ref : ref_latents) {
|
|
||||||
uint64_t h_offset = 0;
|
|
||||||
uint64_t w_offset = 0;
|
|
||||||
if (!increase_ref_index) {
|
|
||||||
if (ref->ne[1] + curr_h_offset > ref->ne[0] + curr_w_offset) {
|
|
||||||
w_offset = curr_w_offset;
|
|
||||||
} else {
|
|
||||||
h_offset = curr_h_offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ref_ids = gen_img_ids(ref->ne[1], ref->ne[0], patch_size, bs, index, h_offset, w_offset);
|
|
||||||
ids = concat_ids(ids, ref_ids, bs);
|
|
||||||
|
|
||||||
if (increase_ref_index) {
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
|
|
||||||
curr_h_offset = std::max(curr_h_offset, ref->ne[1] + h_offset);
|
|
||||||
curr_w_offset = std::max(curr_w_offset, ref->ne[0] + w_offset);
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_flux_ids(int h,
|
|
||||||
int w,
|
|
||||||
int patch_size,
|
|
||||||
int bs,
|
|
||||||
int context_len,
|
|
||||||
const std::vector<ggml_tensor*>& ref_latents,
|
|
||||||
bool increase_ref_index) {
|
|
||||||
auto txt_ids = gen_txt_ids(bs, context_len);
|
|
||||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
|
||||||
|
|
||||||
auto ids = concat_ids(txt_ids, img_ids, bs);
|
|
||||||
if (ref_latents.size() > 0) {
|
|
||||||
auto refs_ids = gen_refs_ids(patch_size, bs, ref_latents, increase_ref_index);
|
|
||||||
ids = concat_ids(ids, refs_ids, bs);
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate flux positional embeddings
|
|
||||||
__STATIC_INLINE__ std::vector<float> gen_flux_pe(int h,
|
|
||||||
int w,
|
|
||||||
int patch_size,
|
|
||||||
int bs,
|
|
||||||
int context_len,
|
|
||||||
const std::vector<ggml_tensor*>& ref_latents,
|
|
||||||
bool increase_ref_index,
|
|
||||||
int theta,
|
|
||||||
const std::vector<int>& axes_dim) {
|
|
||||||
std::vector<std::vector<float>> ids = gen_flux_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index);
|
|
||||||
return embed_nd(ids, bs, theta, axes_dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_qwen_image_ids(int h,
|
|
||||||
int w,
|
|
||||||
int patch_size,
|
|
||||||
int bs,
|
|
||||||
int context_len,
|
|
||||||
const std::vector<ggml_tensor*>& ref_latents,
|
|
||||||
bool increase_ref_index) {
|
|
||||||
int h_len = (h + (patch_size / 2)) / patch_size;
|
|
||||||
int w_len = (w + (patch_size / 2)) / patch_size;
|
|
||||||
int txt_id_start = std::max(h_len, w_len);
|
|
||||||
auto txt_ids = linspace<float>(txt_id_start, context_len + txt_id_start, context_len);
|
|
||||||
std::vector<std::vector<float>> txt_ids_repeated(bs * context_len, std::vector<float>(3));
|
|
||||||
for (int i = 0; i < bs; ++i) {
|
|
||||||
for (int j = 0; j < txt_ids.size(); ++j) {
|
|
||||||
txt_ids_repeated[i * txt_ids.size() + j] = {txt_ids[j], txt_ids[j], txt_ids[j]};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
auto img_ids = gen_img_ids(h, w, patch_size, bs);
|
|
||||||
auto ids = concat_ids(txt_ids_repeated, img_ids, bs);
|
|
||||||
if (ref_latents.size() > 0) {
|
|
||||||
auto refs_ids = gen_refs_ids(patch_size, bs, ref_latents, increase_ref_index);
|
|
||||||
ids = concat_ids(ids, refs_ids, bs);
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate qwen_image positional embeddings
|
|
||||||
__STATIC_INLINE__ std::vector<float> gen_qwen_image_pe(int h,
|
|
||||||
int w,
|
|
||||||
int patch_size,
|
|
||||||
int bs,
|
|
||||||
int context_len,
|
|
||||||
const std::vector<ggml_tensor*>& ref_latents,
|
|
||||||
bool increase_ref_index,
|
|
||||||
int theta,
|
|
||||||
const std::vector<int>& axes_dim) {
|
|
||||||
std::vector<std::vector<float>> ids = gen_qwen_image_ids(h, w, patch_size, bs, context_len, ref_latents, increase_ref_index);
|
|
||||||
return embed_nd(ids, bs, theta, axes_dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_vid_ids(int t,
|
|
||||||
int h,
|
|
||||||
int w,
|
|
||||||
int pt,
|
|
||||||
int ph,
|
|
||||||
int pw,
|
|
||||||
int bs,
|
|
||||||
int t_offset = 0,
|
|
||||||
int h_offset = 0,
|
|
||||||
int w_offset = 0) {
|
|
||||||
int t_len = (t + (pt / 2)) / pt;
|
|
||||||
int h_len = (h + (ph / 2)) / ph;
|
|
||||||
int w_len = (w + (pw / 2)) / pw;
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> vid_ids(t_len * h_len * w_len, std::vector<float>(3, 0.0));
|
|
||||||
|
|
||||||
std::vector<float> t_ids = linspace<float>(t_offset, t_len - 1 + t_offset, t_len);
|
|
||||||
std::vector<float> h_ids = linspace<float>(h_offset, h_len - 1 + h_offset, h_len);
|
|
||||||
std::vector<float> w_ids = linspace<float>(w_offset, w_len - 1 + w_offset, w_len);
|
|
||||||
|
|
||||||
for (int i = 0; i < t_len; ++i) {
|
|
||||||
for (int j = 0; j < h_len; ++j) {
|
|
||||||
for (int k = 0; k < w_len; ++k) {
|
|
||||||
int idx = i * h_len * w_len + j * w_len + k;
|
|
||||||
vid_ids[idx][0] = t_ids[i];
|
|
||||||
vid_ids[idx][1] = h_ids[j];
|
|
||||||
vid_ids[idx][2] = w_ids[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::vector<float>> vid_ids_repeated(bs * vid_ids.size(), std::vector<float>(3));
|
|
||||||
for (int i = 0; i < bs; ++i) {
|
|
||||||
for (int j = 0; j < vid_ids.size(); ++j) {
|
|
||||||
vid_ids_repeated[i * vid_ids.size() + j] = vid_ids[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return vid_ids_repeated;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate wan positional embeddings
|
|
||||||
__STATIC_INLINE__ std::vector<float> gen_wan_pe(int t,
|
|
||||||
int h,
|
|
||||||
int w,
|
|
||||||
int pt,
|
|
||||||
int ph,
|
|
||||||
int pw,
|
|
||||||
int bs,
|
|
||||||
int theta,
|
|
||||||
const std::vector<int>& axes_dim) {
|
|
||||||
std::vector<std::vector<float>> ids = gen_vid_ids(t, h, w, pt, ph, pw, bs);
|
|
||||||
return embed_nd(ids, bs, theta, axes_dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::vector<float>> gen_qwen2vl_ids(int grid_h,
|
|
||||||
int grid_w,
|
|
||||||
int merge_size,
|
|
||||||
const std::vector<int>& window_index) {
|
|
||||||
std::vector<std::vector<float>> ids(grid_h * grid_w, std::vector<float>(2, 0.0));
|
|
||||||
int index = 0;
|
|
||||||
for (int ih = 0; ih < grid_h; ih += merge_size) {
|
|
||||||
for (int iw = 0; iw < grid_w; iw += merge_size) {
|
|
||||||
for (int iy = 0; iy < merge_size; iy++) {
|
|
||||||
for (int ix = 0; ix < merge_size; ix++) {
|
|
||||||
int inverse_index = window_index[index / (merge_size * merge_size)];
|
|
||||||
int i = inverse_index * (merge_size * merge_size) + index % (merge_size * merge_size);
|
|
||||||
|
|
||||||
GGML_ASSERT(i < grid_h * grid_w);
|
|
||||||
|
|
||||||
ids[i][0] = ih + iy;
|
|
||||||
ids[i][1] = iw + ix;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate qwen2vl positional embeddings
|
|
||||||
__STATIC_INLINE__ std::vector<float> gen_qwen2vl_pe(int grid_h,
|
|
||||||
int grid_w,
|
|
||||||
int merge_size,
|
|
||||||
const std::vector<int>& window_index,
|
|
||||||
int theta,
|
|
||||||
const std::vector<int>& axes_dim) {
|
|
||||||
std::vector<std::vector<float>> ids = gen_qwen2vl_ids(grid_h, grid_w, merge_size, window_index);
|
|
||||||
return embed_nd(ids, 1, theta, axes_dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* apply_rope(struct ggml_context* ctx,
|
|
||||||
struct ggml_tensor* x,
|
|
||||||
struct ggml_tensor* pe,
|
|
||||||
bool rope_interleaved = true) {
|
|
||||||
// x: [N, L, n_head, d_head]
|
|
||||||
// pe: [L, d_head/2, 2, 2], [[cos, -sin], [sin, cos]]
|
|
||||||
int64_t d_head = x->ne[0];
|
|
||||||
int64_t n_head = x->ne[1];
|
|
||||||
int64_t L = x->ne[2];
|
|
||||||
int64_t N = x->ne[3];
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, n_head, L, d_head]
|
|
||||||
if (rope_interleaved) {
|
|
||||||
x = ggml_reshape_4d(ctx, x, 2, d_head / 2, L, n_head * N); // [N * n_head, L, d_head/2, 2]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 3, 0, 1, 2)); // [2, N * n_head, L, d_head/2]
|
|
||||||
} else {
|
|
||||||
x = ggml_reshape_4d(ctx, x, d_head / 2, 2, L, n_head * N); // [N * n_head, L, 2, d_head/2]
|
|
||||||
x = ggml_cont(ctx, ggml_ext_torch_permute(ctx, x, 0, 2, 3, 1)); // [2, N * n_head, L, d_head/2]
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t offset = x->nb[2] * x->ne[2];
|
|
||||||
auto x_0 = ggml_view_3d(ctx, x, x->ne[0], x->ne[1], x->ne[2], x->nb[1], x->nb[2], offset * 0); // [N * n_head, L, d_head/2]
|
|
||||||
auto x_1 = ggml_view_3d(ctx, x, x->ne[0], x->ne[1], x->ne[2], x->nb[1], x->nb[2], offset * 1); // [N * n_head, L, d_head/2]
|
|
||||||
x_0 = ggml_reshape_4d(ctx, x_0, 1, x_0->ne[0], x_0->ne[1], x_0->ne[2]); // [N * n_head, L, d_head/2, 1]
|
|
||||||
x_1 = ggml_reshape_4d(ctx, x_1, 1, x_1->ne[0], x_1->ne[1], x_1->ne[2]); // [N * n_head, L, d_head/2, 1]
|
|
||||||
auto temp_x = ggml_new_tensor_4d(ctx, x_0->type, 2, x_0->ne[1], x_0->ne[2], x_0->ne[3]);
|
|
||||||
x_0 = ggml_repeat(ctx, x_0, temp_x); // [N * n_head, L, d_head/2, 2]
|
|
||||||
x_1 = ggml_repeat(ctx, x_1, temp_x); // [N * n_head, L, d_head/2, 2]
|
|
||||||
|
|
||||||
pe = ggml_cont(ctx, ggml_permute(ctx, pe, 3, 0, 1, 2)); // [2, L, d_head/2, 2]
|
|
||||||
offset = pe->nb[2] * pe->ne[2];
|
|
||||||
auto pe_0 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 0); // [L, d_head/2, 2]
|
|
||||||
auto pe_1 = ggml_view_3d(ctx, pe, pe->ne[0], pe->ne[1], pe->ne[2], pe->nb[1], pe->nb[2], offset * 1); // [L, d_head/2, 2]
|
|
||||||
|
|
||||||
auto x_out = ggml_add_inplace(ctx, ggml_mul(ctx, x_0, pe_0), ggml_mul(ctx, x_1, pe_1)); // [N * n_head, L, d_head/2, 2]
|
|
||||||
if (!rope_interleaved) {
|
|
||||||
x_out = ggml_cont(ctx, ggml_permute(ctx, x_out, 1, 0, 2, 3)); // [N * n_head, L, x, d_head/2]
|
|
||||||
}
|
|
||||||
x_out = ggml_reshape_3d(ctx, x_out, d_head, L, n_head * N); // [N*n_head, L, d_head]
|
|
||||||
return x_out;
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* attention(GGMLRunnerContext* ctx,
|
|
||||||
struct ggml_tensor* q,
|
|
||||||
struct ggml_tensor* k,
|
|
||||||
struct ggml_tensor* v,
|
|
||||||
struct ggml_tensor* pe,
|
|
||||||
struct ggml_tensor* mask,
|
|
||||||
float kv_scale = 1.0f,
|
|
||||||
bool rope_interleaved = true) {
|
|
||||||
// q,k,v: [N, L, n_head, d_head]
|
|
||||||
// pe: [L, d_head/2, 2, 2]
|
|
||||||
// return: [N, L, n_head*d_head]
|
|
||||||
q = apply_rope(ctx->ggml_ctx, q, pe, rope_interleaved); // [N*n_head, L, d_head]
|
|
||||||
k = apply_rope(ctx->ggml_ctx, k, pe, rope_interleaved); // [N*n_head, L, d_head]
|
|
||||||
|
|
||||||
auto x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, v->ne[1], mask, false, true, ctx->flash_attn_enabled, kv_scale); // [N, L, n_head*d_head]
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
}; // namespace Rope
|
|
||||||
|
|
||||||
#endif // __ROPE_HPP__
|
|
||||||
686
src/anima.hpp
Normal file
@ -0,0 +1,686 @@
|
|||||||
|
#ifndef __ANIMA_HPP__
|
||||||
|
#define __ANIMA_HPP__
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common_block.hpp"
|
||||||
|
#include "flux.hpp"
|
||||||
|
#include "rope.hpp"
|
||||||
|
|
||||||
|
namespace Anima {
|
||||||
|
constexpr int ANIMA_GRAPH_SIZE = 65536;
|
||||||
|
|
||||||
|
__STATIC_INLINE__ ggml_tensor* apply_gate(ggml_context* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
ggml_tensor* gate) {
|
||||||
|
gate = ggml_reshape_3d(ctx, gate, gate->ne[0], 1, gate->ne[1]); // [N, 1, C]
|
||||||
|
return ggml_mul(ctx, x, gate);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct XEmbedder : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
XEmbedder(int64_t in_dim, int64_t out_dim) {
|
||||||
|
blocks["proj.1"] = std::make_shared<Linear>(in_dim, out_dim, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
|
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj.1"]);
|
||||||
|
return proj->forward(ctx, x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimestepEmbedder : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
TimestepEmbedder(int64_t in_dim, int64_t out_dim) {
|
||||||
|
blocks["1.linear_1"] = std::make_shared<Linear>(in_dim, in_dim, false);
|
||||||
|
blocks["1.linear_2"] = std::make_shared<Linear>(in_dim, out_dim, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
|
auto linear_1 = std::dynamic_pointer_cast<Linear>(blocks["1.linear_1"]);
|
||||||
|
auto linear_2 = std::dynamic_pointer_cast<Linear>(blocks["1.linear_2"]);
|
||||||
|
|
||||||
|
x = linear_1->forward(ctx, x);
|
||||||
|
x = ggml_silu_inplace(ctx->ggml_ctx, x);
|
||||||
|
x = linear_2->forward(ctx, x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AdaLayerNormZero : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t in_features;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AdaLayerNormZero(int64_t in_features, int64_t hidden_features = 256)
|
||||||
|
: in_features(in_features) {
|
||||||
|
blocks["norm"] = std::make_shared<LayerNorm>(in_features, 1e-6f, false, false);
|
||||||
|
blocks["1"] = std::make_shared<Linear>(in_features, hidden_features, false);
|
||||||
|
blocks["2"] = std::make_shared<Linear>(hidden_features, 3 * in_features, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* hidden_states,
|
||||||
|
ggml_tensor* embedded_timestep,
|
||||||
|
ggml_tensor* temb = nullptr) {
|
||||||
|
auto norm = std::dynamic_pointer_cast<LayerNorm>(blocks["norm"]);
|
||||||
|
auto linear_1 = std::dynamic_pointer_cast<Linear>(blocks["1"]);
|
||||||
|
auto linear_2 = std::dynamic_pointer_cast<Linear>(blocks["2"]);
|
||||||
|
|
||||||
|
auto emb = ggml_silu(ctx->ggml_ctx, embedded_timestep);
|
||||||
|
emb = linear_1->forward(ctx, emb);
|
||||||
|
emb = linear_2->forward(ctx, emb); // [N, 3*C]
|
||||||
|
|
||||||
|
if (temb != nullptr) {
|
||||||
|
emb = ggml_add(ctx->ggml_ctx, emb, temb);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto emb_chunks = ggml_ext_chunk(ctx->ggml_ctx, emb, 3, 0);
|
||||||
|
auto shift = emb_chunks[0];
|
||||||
|
auto scale = emb_chunks[1];
|
||||||
|
auto gate = emb_chunks[2];
|
||||||
|
|
||||||
|
auto x = norm->forward(ctx, hidden_states);
|
||||||
|
x = Flux::modulate(ctx->ggml_ctx, x, shift, scale);
|
||||||
|
|
||||||
|
return {x, gate};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AdaLayerNorm : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t embedding_dim;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AdaLayerNorm(int64_t in_features, int64_t hidden_features = 256)
|
||||||
|
: embedding_dim(in_features) {
|
||||||
|
blocks["norm"] = std::make_shared<LayerNorm>(in_features, 1e-6f, false, false);
|
||||||
|
blocks["1"] = std::make_shared<Linear>(in_features, hidden_features, false);
|
||||||
|
blocks["2"] = std::make_shared<Linear>(hidden_features, 2 * in_features, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* hidden_states,
|
||||||
|
ggml_tensor* embedded_timestep,
|
||||||
|
ggml_tensor* temb = nullptr) {
|
||||||
|
auto norm = std::dynamic_pointer_cast<LayerNorm>(blocks["norm"]);
|
||||||
|
auto linear_1 = std::dynamic_pointer_cast<Linear>(blocks["1"]);
|
||||||
|
auto linear_2 = std::dynamic_pointer_cast<Linear>(blocks["2"]);
|
||||||
|
|
||||||
|
auto emb = ggml_silu(ctx->ggml_ctx, embedded_timestep);
|
||||||
|
emb = linear_1->forward(ctx, emb);
|
||||||
|
emb = linear_2->forward(ctx, emb); // [N, 2*C]
|
||||||
|
|
||||||
|
if (temb != nullptr) {
|
||||||
|
auto temb_2c = ggml_view_2d(ctx->ggml_ctx, temb, 2 * embedding_dim, temb->ne[1], temb->nb[1], 0);
|
||||||
|
emb = ggml_add(ctx->ggml_ctx, emb, temb_2c);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto emb_chunks = ggml_ext_chunk(ctx->ggml_ctx, emb, 2, 0);
|
||||||
|
auto shift = emb_chunks[0];
|
||||||
|
auto scale = emb_chunks[1];
|
||||||
|
|
||||||
|
auto x = norm->forward(ctx, hidden_states);
|
||||||
|
x = Flux::modulate(ctx->ggml_ctx, x, shift, scale);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnimaAttention : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t num_heads;
|
||||||
|
int64_t head_dim;
|
||||||
|
std::string out_proj_name;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AnimaAttention(int64_t query_dim,
|
||||||
|
int64_t context_dim,
|
||||||
|
int64_t num_heads,
|
||||||
|
int64_t head_dim,
|
||||||
|
const std::string& out_proj_name = "output_proj")
|
||||||
|
: num_heads(num_heads), head_dim(head_dim), out_proj_name(out_proj_name) {
|
||||||
|
int64_t inner_dim = num_heads * head_dim;
|
||||||
|
|
||||||
|
blocks["q_proj"] = std::make_shared<Linear>(query_dim, inner_dim, false);
|
||||||
|
blocks["k_proj"] = std::make_shared<Linear>(context_dim, inner_dim, false);
|
||||||
|
blocks["v_proj"] = std::make_shared<Linear>(context_dim, inner_dim, false);
|
||||||
|
blocks["q_norm"] = std::make_shared<RMSNorm>(head_dim, 1e-6f);
|
||||||
|
blocks["k_norm"] = std::make_shared<RMSNorm>(head_dim, 1e-6f);
|
||||||
|
blocks[this->out_proj_name] = std::make_shared<Linear>(inner_dim, query_dim, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* hidden_states,
|
||||||
|
ggml_tensor* encoder_hidden_states = nullptr,
|
||||||
|
ggml_tensor* pe_q = nullptr,
|
||||||
|
ggml_tensor* pe_k = nullptr) {
|
||||||
|
if (encoder_hidden_states == nullptr) {
|
||||||
|
encoder_hidden_states = hidden_states;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto q_proj = std::dynamic_pointer_cast<Linear>(blocks["q_proj"]);
|
||||||
|
auto k_proj = std::dynamic_pointer_cast<Linear>(blocks["k_proj"]);
|
||||||
|
auto v_proj = std::dynamic_pointer_cast<Linear>(blocks["v_proj"]);
|
||||||
|
auto q_norm = std::dynamic_pointer_cast<RMSNorm>(blocks["q_norm"]);
|
||||||
|
auto k_norm = std::dynamic_pointer_cast<RMSNorm>(blocks["k_norm"]);
|
||||||
|
auto out_proj = std::dynamic_pointer_cast<Linear>(blocks[out_proj_name]);
|
||||||
|
|
||||||
|
auto q = q_proj->forward(ctx, hidden_states);
|
||||||
|
auto k = k_proj->forward(ctx, encoder_hidden_states);
|
||||||
|
auto v = v_proj->forward(ctx, encoder_hidden_states);
|
||||||
|
|
||||||
|
int64_t N = q->ne[2];
|
||||||
|
int64_t L_q = q->ne[1];
|
||||||
|
int64_t L_k = k->ne[1];
|
||||||
|
|
||||||
|
auto q4 = ggml_reshape_4d(ctx->ggml_ctx, q, head_dim, num_heads, L_q, N); // [N, L_q, H, D]
|
||||||
|
auto k4 = ggml_reshape_4d(ctx->ggml_ctx, k, head_dim, num_heads, L_k, N); // [N, L_k, H, D]
|
||||||
|
auto v4 = ggml_reshape_4d(ctx->ggml_ctx, v, head_dim, num_heads, L_k, N); // [N, L_k, H, D]
|
||||||
|
|
||||||
|
q4 = q_norm->forward(ctx, q4);
|
||||||
|
k4 = k_norm->forward(ctx, k4);
|
||||||
|
|
||||||
|
ggml_tensor* attn_out = nullptr;
|
||||||
|
if (pe_q != nullptr || pe_k != nullptr) {
|
||||||
|
if (pe_q == nullptr) {
|
||||||
|
pe_q = pe_k;
|
||||||
|
}
|
||||||
|
if (pe_k == nullptr) {
|
||||||
|
pe_k = pe_q;
|
||||||
|
}
|
||||||
|
auto q_rope = Rope::apply_rope(ctx->ggml_ctx, q4, pe_q, false);
|
||||||
|
auto k_rope = Rope::apply_rope(ctx->ggml_ctx, k4, pe_k, false);
|
||||||
|
attn_out = ggml_ext_attention_ext(ctx->ggml_ctx,
|
||||||
|
ctx->backend,
|
||||||
|
q_rope,
|
||||||
|
k_rope,
|
||||||
|
v4,
|
||||||
|
num_heads,
|
||||||
|
nullptr,
|
||||||
|
true,
|
||||||
|
ctx->flash_attn_enabled);
|
||||||
|
} else {
|
||||||
|
auto q_flat = ggml_reshape_3d(ctx->ggml_ctx, q4, head_dim * num_heads, L_q, N);
|
||||||
|
auto k_flat = ggml_reshape_3d(ctx->ggml_ctx, k4, head_dim * num_heads, L_k, N);
|
||||||
|
attn_out = ggml_ext_attention_ext(ctx->ggml_ctx,
|
||||||
|
ctx->backend,
|
||||||
|
q_flat,
|
||||||
|
k_flat,
|
||||||
|
v,
|
||||||
|
num_heads,
|
||||||
|
nullptr,
|
||||||
|
false,
|
||||||
|
ctx->flash_attn_enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out_proj->forward(ctx, attn_out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnimaMLP : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
AnimaMLP(int64_t dim, int64_t hidden_dim) {
|
||||||
|
blocks["layer1"] = std::make_shared<Linear>(dim, hidden_dim, false);
|
||||||
|
blocks["layer2"] = std::make_shared<Linear>(hidden_dim, dim, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
|
auto layer1 = std::dynamic_pointer_cast<Linear>(blocks["layer1"]);
|
||||||
|
auto layer2 = std::dynamic_pointer_cast<Linear>(blocks["layer2"]);
|
||||||
|
|
||||||
|
x = layer1->forward(ctx, x);
|
||||||
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
|
x = layer2->forward(ctx, x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AdapterMLP : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
AdapterMLP(int64_t dim, int64_t hidden_dim) {
|
||||||
|
blocks["0"] = std::make_shared<Linear>(dim, hidden_dim, true);
|
||||||
|
blocks["2"] = std::make_shared<Linear>(hidden_dim, dim, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
|
auto layer0 = std::dynamic_pointer_cast<Linear>(blocks["0"]);
|
||||||
|
auto layer2 = std::dynamic_pointer_cast<Linear>(blocks["2"]);
|
||||||
|
|
||||||
|
x = layer0->forward(ctx, x);
|
||||||
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
|
x = layer2->forward(ctx, x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LLMAdapterBlock : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
LLMAdapterBlock(int64_t model_dim = 1024, int64_t source_dim = 1024, int64_t num_heads = 16, int64_t head_dim = 64) {
|
||||||
|
blocks["norm_self_attn"] = std::make_shared<RMSNorm>(model_dim, 1e-6f);
|
||||||
|
blocks["self_attn"] = std::make_shared<AnimaAttention>(model_dim, model_dim, num_heads, head_dim, "o_proj");
|
||||||
|
blocks["norm_cross_attn"] = std::make_shared<RMSNorm>(model_dim, 1e-6f);
|
||||||
|
blocks["cross_attn"] = std::make_shared<AnimaAttention>(model_dim, source_dim, num_heads, head_dim, "o_proj");
|
||||||
|
blocks["norm_mlp"] = std::make_shared<RMSNorm>(model_dim, 1e-6f);
|
||||||
|
blocks["mlp"] = std::make_shared<AdapterMLP>(model_dim, model_dim * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
ggml_tensor* context,
|
||||||
|
ggml_tensor* target_pe,
|
||||||
|
ggml_tensor* context_pe) {
|
||||||
|
auto norm_self_attn = std::dynamic_pointer_cast<RMSNorm>(blocks["norm_self_attn"]);
|
||||||
|
auto self_attn = std::dynamic_pointer_cast<AnimaAttention>(blocks["self_attn"]);
|
||||||
|
auto norm_cross_attn = std::dynamic_pointer_cast<RMSNorm>(blocks["norm_cross_attn"]);
|
||||||
|
auto cross_attn = std::dynamic_pointer_cast<AnimaAttention>(blocks["cross_attn"]);
|
||||||
|
auto norm_mlp = std::dynamic_pointer_cast<RMSNorm>(blocks["norm_mlp"]);
|
||||||
|
auto mlp = std::dynamic_pointer_cast<AdapterMLP>(blocks["mlp"]);
|
||||||
|
|
||||||
|
auto h = norm_self_attn->forward(ctx, x);
|
||||||
|
h = self_attn->forward(ctx, h, nullptr, target_pe, target_pe);
|
||||||
|
x = ggml_add(ctx->ggml_ctx, x, h);
|
||||||
|
|
||||||
|
h = norm_cross_attn->forward(ctx, x);
|
||||||
|
h = cross_attn->forward(ctx, h, context, target_pe, context_pe);
|
||||||
|
x = ggml_add(ctx->ggml_ctx, x, h);
|
||||||
|
|
||||||
|
h = norm_mlp->forward(ctx, x);
|
||||||
|
h = mlp->forward(ctx, h);
|
||||||
|
x = ggml_add(ctx->ggml_ctx, x, h);
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LLMAdapter : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int num_layers;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LLMAdapter(int64_t source_dim = 1024,
|
||||||
|
int64_t target_dim = 1024,
|
||||||
|
int64_t model_dim = 1024,
|
||||||
|
int num_layers = 6,
|
||||||
|
int num_heads = 16)
|
||||||
|
: num_layers(num_layers) {
|
||||||
|
int64_t head_dim = model_dim / num_heads;
|
||||||
|
|
||||||
|
blocks["embed"] = std::make_shared<Embedding>(32128, target_dim);
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
blocks["blocks." + std::to_string(i)] =
|
||||||
|
std::make_shared<LLMAdapterBlock>(model_dim, source_dim, num_heads, head_dim);
|
||||||
|
}
|
||||||
|
blocks["out_proj"] = std::make_shared<Linear>(model_dim, target_dim, true);
|
||||||
|
blocks["norm"] = std::make_shared<RMSNorm>(target_dim, 1e-6f);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* source_hidden_states,
|
||||||
|
ggml_tensor* target_input_ids,
|
||||||
|
ggml_tensor* target_pe,
|
||||||
|
ggml_tensor* source_pe) {
|
||||||
|
GGML_ASSERT(target_input_ids != nullptr);
|
||||||
|
if (ggml_n_dims(target_input_ids) == 1) {
|
||||||
|
target_input_ids = ggml_reshape_2d(ctx->ggml_ctx, target_input_ids, target_input_ids->ne[0], 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto embed = std::dynamic_pointer_cast<Embedding>(blocks["embed"]);
|
||||||
|
auto out_proj = std::dynamic_pointer_cast<Linear>(blocks["out_proj"]);
|
||||||
|
auto norm = std::dynamic_pointer_cast<RMSNorm>(blocks["norm"]);
|
||||||
|
|
||||||
|
auto x = embed->forward(ctx, target_input_ids); // [N, target_len, target_dim]
|
||||||
|
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
auto block = std::dynamic_pointer_cast<LLMAdapterBlock>(blocks["blocks." + std::to_string(i)]);
|
||||||
|
x = block->forward(ctx, x, source_hidden_states, target_pe, source_pe);
|
||||||
|
}
|
||||||
|
|
||||||
|
x = out_proj->forward(ctx, x);
|
||||||
|
x = norm->forward(ctx, x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TransformerBlock : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
TransformerBlock(int64_t hidden_size,
|
||||||
|
int64_t text_embed_dim,
|
||||||
|
int64_t num_heads,
|
||||||
|
int64_t head_dim,
|
||||||
|
int64_t mlp_ratio = 4,
|
||||||
|
int64_t adaln_lora_dim = 256) {
|
||||||
|
blocks["adaln_modulation_self_attn"] = std::make_shared<AdaLayerNormZero>(hidden_size, adaln_lora_dim);
|
||||||
|
blocks["self_attn"] = std::make_shared<AnimaAttention>(hidden_size, hidden_size, num_heads, head_dim);
|
||||||
|
blocks["adaln_modulation_cross_attn"] = std::make_shared<AdaLayerNormZero>(hidden_size, adaln_lora_dim);
|
||||||
|
blocks["cross_attn"] = std::make_shared<AnimaAttention>(hidden_size, text_embed_dim, num_heads, head_dim);
|
||||||
|
blocks["adaln_modulation_mlp"] = std::make_shared<AdaLayerNormZero>(hidden_size, adaln_lora_dim);
|
||||||
|
blocks["mlp"] = std::make_shared<AnimaMLP>(hidden_size, hidden_size * mlp_ratio);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* hidden_states,
|
||||||
|
ggml_tensor* encoder_hidden_states,
|
||||||
|
ggml_tensor* embedded_timestep,
|
||||||
|
ggml_tensor* temb,
|
||||||
|
ggml_tensor* image_pe) {
|
||||||
|
auto norm1 = std::dynamic_pointer_cast<AdaLayerNormZero>(blocks["adaln_modulation_self_attn"]);
|
||||||
|
auto attn1 = std::dynamic_pointer_cast<AnimaAttention>(blocks["self_attn"]);
|
||||||
|
auto norm2 = std::dynamic_pointer_cast<AdaLayerNormZero>(blocks["adaln_modulation_cross_attn"]);
|
||||||
|
auto attn2 = std::dynamic_pointer_cast<AnimaAttention>(blocks["cross_attn"]);
|
||||||
|
auto norm3 = std::dynamic_pointer_cast<AdaLayerNormZero>(blocks["adaln_modulation_mlp"]);
|
||||||
|
auto mlp = std::dynamic_pointer_cast<AnimaMLP>(blocks["mlp"]);
|
||||||
|
|
||||||
|
auto [normed1, gate1] = norm1->forward(ctx, hidden_states, embedded_timestep, temb);
|
||||||
|
auto h = attn1->forward(ctx, normed1, nullptr, image_pe, image_pe);
|
||||||
|
hidden_states = ggml_add(ctx->ggml_ctx, hidden_states, apply_gate(ctx->ggml_ctx, h, gate1));
|
||||||
|
|
||||||
|
auto [normed2, gate2] = norm2->forward(ctx, hidden_states, embedded_timestep, temb);
|
||||||
|
h = attn2->forward(ctx, normed2, encoder_hidden_states, nullptr, nullptr);
|
||||||
|
hidden_states = ggml_add(ctx->ggml_ctx, hidden_states, apply_gate(ctx->ggml_ctx, h, gate2));
|
||||||
|
|
||||||
|
auto [normed3, gate3] = norm3->forward(ctx, hidden_states, embedded_timestep, temb);
|
||||||
|
h = mlp->forward(ctx, normed3);
|
||||||
|
hidden_states = ggml_add(ctx->ggml_ctx, hidden_states, apply_gate(ctx->ggml_ctx, h, gate3));
|
||||||
|
|
||||||
|
return hidden_states;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FinalLayer : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
int64_t hidden_size;
|
||||||
|
int64_t patch_size;
|
||||||
|
int64_t out_channels;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FinalLayer(int64_t hidden_size, int64_t patch_size, int64_t out_channels)
|
||||||
|
: hidden_size(hidden_size), patch_size(patch_size), out_channels(out_channels) {
|
||||||
|
blocks["adaln_modulation"] = std::make_shared<AdaLayerNorm>(hidden_size, 256);
|
||||||
|
blocks["linear"] = std::make_shared<Linear>(hidden_size, patch_size * patch_size * out_channels, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* hidden_states,
|
||||||
|
ggml_tensor* embedded_timestep,
|
||||||
|
ggml_tensor* temb) {
|
||||||
|
auto adaln = std::dynamic_pointer_cast<AdaLayerNorm>(blocks["adaln_modulation"]);
|
||||||
|
auto linear = std::dynamic_pointer_cast<Linear>(blocks["linear"]);
|
||||||
|
|
||||||
|
hidden_states = adaln->forward(ctx, hidden_states, embedded_timestep, temb);
|
||||||
|
hidden_states = linear->forward(ctx, hidden_states);
|
||||||
|
return hidden_states;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnimaNet : public GGMLBlock {
|
||||||
|
public:
|
||||||
|
int64_t in_channels = 16;
|
||||||
|
int64_t out_channels = 16;
|
||||||
|
int64_t hidden_size = 2048;
|
||||||
|
int64_t text_embed_dim = 1024;
|
||||||
|
int64_t num_heads = 16;
|
||||||
|
int64_t head_dim = 128;
|
||||||
|
int patch_size = 2;
|
||||||
|
int64_t num_layers = 28;
|
||||||
|
std::vector<int> axes_dim = {44, 42, 42};
|
||||||
|
int theta = 10000;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AnimaNet() = default;
|
||||||
|
explicit AnimaNet(int64_t num_layers)
|
||||||
|
: num_layers(num_layers) {
|
||||||
|
blocks["x_embedder"] = std::make_shared<XEmbedder>((in_channels + 1) * patch_size * patch_size, hidden_size);
|
||||||
|
blocks["t_embedder"] = std::make_shared<TimestepEmbedder>(hidden_size, hidden_size * 3);
|
||||||
|
blocks["t_embedding_norm"] = std::make_shared<RMSNorm>(hidden_size, 1e-6f);
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
blocks["blocks." + std::to_string(i)] = std::make_shared<TransformerBlock>(hidden_size,
|
||||||
|
text_embed_dim,
|
||||||
|
num_heads,
|
||||||
|
head_dim);
|
||||||
|
}
|
||||||
|
blocks["final_layer"] = std::make_shared<FinalLayer>(hidden_size, patch_size, out_channels);
|
||||||
|
blocks["llm_adapter"] = std::make_shared<LLMAdapter>(1024, 1024, 1024, 6, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
ggml_tensor* timestep,
|
||||||
|
ggml_tensor* encoder_hidden_states,
|
||||||
|
ggml_tensor* image_pe,
|
||||||
|
ggml_tensor* t5_ids = nullptr,
|
||||||
|
ggml_tensor* t5_weights = nullptr,
|
||||||
|
ggml_tensor* adapter_q_pe = nullptr,
|
||||||
|
ggml_tensor* adapter_k_pe = nullptr) {
|
||||||
|
GGML_ASSERT(x->ne[3] == 1);
|
||||||
|
|
||||||
|
auto x_embedder = std::dynamic_pointer_cast<XEmbedder>(blocks["x_embedder"]);
|
||||||
|
auto t_embedder = std::dynamic_pointer_cast<TimestepEmbedder>(blocks["t_embedder"]);
|
||||||
|
auto t_embedding_norm = std::dynamic_pointer_cast<RMSNorm>(blocks["t_embedding_norm"]);
|
||||||
|
auto final_layer = std::dynamic_pointer_cast<FinalLayer>(blocks["final_layer"]);
|
||||||
|
auto llm_adapter = std::dynamic_pointer_cast<LLMAdapter>(blocks["llm_adapter"]);
|
||||||
|
|
||||||
|
int64_t W = x->ne[0];
|
||||||
|
int64_t H = x->ne[1];
|
||||||
|
|
||||||
|
auto padding_mask = ggml_ext_zeros(ctx->ggml_ctx, x->ne[0], x->ne[1], 1, x->ne[3]);
|
||||||
|
x = ggml_concat(ctx->ggml_ctx, x, padding_mask, 2); // [N, C + 1, H, W]
|
||||||
|
|
||||||
|
x = DiT::pad_and_patchify(ctx, x, patch_size, patch_size); // [N, h*w, (C+1)*ph*pw]
|
||||||
|
|
||||||
|
x = x_embedder->forward(ctx, x);
|
||||||
|
|
||||||
|
auto timestep_proj = ggml_ext_timestep_embedding(ctx->ggml_ctx, timestep, static_cast<int>(hidden_size));
|
||||||
|
auto temb = t_embedder->forward(ctx, timestep_proj);
|
||||||
|
auto embedded_timestep = t_embedding_norm->forward(ctx, timestep_proj);
|
||||||
|
|
||||||
|
if (t5_ids != nullptr) {
|
||||||
|
auto adapted_context = llm_adapter->forward(ctx, encoder_hidden_states, t5_ids, adapter_q_pe, adapter_k_pe);
|
||||||
|
if (t5_weights != nullptr) {
|
||||||
|
auto w = t5_weights;
|
||||||
|
if (ggml_n_dims(w) == 1) {
|
||||||
|
w = ggml_reshape_3d(ctx->ggml_ctx, w, 1, w->ne[0], 1);
|
||||||
|
}
|
||||||
|
w = ggml_repeat_4d(ctx->ggml_ctx, w, adapted_context->ne[0], adapted_context->ne[1], adapted_context->ne[2], 1);
|
||||||
|
adapted_context = ggml_mul(ctx->ggml_ctx, adapted_context, w);
|
||||||
|
}
|
||||||
|
if (adapted_context->ne[1] < 512) {
|
||||||
|
auto pad_ctx = ggml_ext_zeros(ctx->ggml_ctx,
|
||||||
|
adapted_context->ne[0],
|
||||||
|
512 - adapted_context->ne[1],
|
||||||
|
adapted_context->ne[2],
|
||||||
|
1);
|
||||||
|
adapted_context = ggml_concat(ctx->ggml_ctx, adapted_context, pad_ctx, 1);
|
||||||
|
} else if (adapted_context->ne[1] > 512) {
|
||||||
|
adapted_context = ggml_ext_slice(ctx->ggml_ctx, adapted_context, 1, 0, 512);
|
||||||
|
}
|
||||||
|
encoder_hidden_states = adapted_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < num_layers; i++) {
|
||||||
|
auto block = std::dynamic_pointer_cast<TransformerBlock>(blocks["blocks." + std::to_string(i)]);
|
||||||
|
x = block->forward(ctx, x, encoder_hidden_states, embedded_timestep, temb, image_pe);
|
||||||
|
}
|
||||||
|
|
||||||
|
x = final_layer->forward(ctx, x, embedded_timestep, temb); // [N, h*w, ph*pw*C]
|
||||||
|
|
||||||
|
x = DiT::unpatchify_and_crop(ctx->ggml_ctx, x, H, W, patch_size, patch_size, false); // [N, C, H, W]
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnimaRunner : public GGMLRunner {
|
||||||
|
public:
|
||||||
|
std::vector<float> image_pe_vec;
|
||||||
|
std::vector<float> adapter_q_pe_vec;
|
||||||
|
std::vector<float> adapter_k_pe_vec;
|
||||||
|
AnimaNet net;
|
||||||
|
|
||||||
|
AnimaRunner(ggml_backend_t backend,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
|
const std::string prefix = "model.diffusion_model")
|
||||||
|
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||||
|
int64_t num_layers = 0;
|
||||||
|
std::string layer_tag = prefix + ".net.blocks.";
|
||||||
|
for (const auto& kv : tensor_storage_map) {
|
||||||
|
const std::string& tensor_name = kv.first;
|
||||||
|
size_t pos = tensor_name.find(layer_tag);
|
||||||
|
if (pos == std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
size_t start = pos + layer_tag.size();
|
||||||
|
size_t end = tensor_name.find('.', start);
|
||||||
|
if (end == std::string::npos) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int64_t layer_id = atoll(tensor_name.substr(start, end - start).c_str());
|
||||||
|
num_layers = std::max(num_layers, layer_id + 1);
|
||||||
|
}
|
||||||
|
if (num_layers <= 0) {
|
||||||
|
num_layers = 28;
|
||||||
|
}
|
||||||
|
LOG_INFO("anima net layers: %" PRId64, num_layers);
|
||||||
|
|
||||||
|
net = AnimaNet(num_layers);
|
||||||
|
net.init(params_ctx, tensor_storage_map, prefix + ".net");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_desc() override {
|
||||||
|
return "anima";
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
|
net.get_param_tensors(tensors, prefix + ".net");
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<float> gen_1d_rope_pe_vec(int64_t seq_len, int dim, float theta = 10000.f) {
|
||||||
|
std::vector<float> pos(seq_len);
|
||||||
|
for (int64_t i = 0; i < seq_len; i++) {
|
||||||
|
pos[i] = static_cast<float>(i);
|
||||||
|
}
|
||||||
|
auto rope_emb = Rope::rope(pos, dim, theta);
|
||||||
|
return Rope::flatten(rope_emb);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float calc_ntk_factor(float extrapolation_ratio, int axis_dim) {
|
||||||
|
if (extrapolation_ratio == 1.0f || axis_dim <= 2) {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
return std::pow(extrapolation_ratio, static_cast<float>(axis_dim) / static_cast<float>(axis_dim - 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<float> gen_anima_image_pe_vec(int bs,
|
||||||
|
int h,
|
||||||
|
int w,
|
||||||
|
int patch_size,
|
||||||
|
int theta,
|
||||||
|
const std::vector<int>& axes_dim,
|
||||||
|
float h_extrapolation_ratio,
|
||||||
|
float w_extrapolation_ratio,
|
||||||
|
float t_extrapolation_ratio) {
|
||||||
|
static const std::vector<ggml_tensor*> empty_ref_latents;
|
||||||
|
auto ids = Rope::gen_flux_ids(h,
|
||||||
|
w,
|
||||||
|
patch_size,
|
||||||
|
bs,
|
||||||
|
static_cast<int>(axes_dim.size()),
|
||||||
|
0,
|
||||||
|
{},
|
||||||
|
empty_ref_latents,
|
||||||
|
false,
|
||||||
|
1.0f);
|
||||||
|
|
||||||
|
std::vector<float> axis_thetas = {
|
||||||
|
static_cast<float>(theta) * calc_ntk_factor(t_extrapolation_ratio, axes_dim[0]),
|
||||||
|
static_cast<float>(theta) * calc_ntk_factor(h_extrapolation_ratio, axes_dim[1]),
|
||||||
|
static_cast<float>(theta) * calc_ntk_factor(w_extrapolation_ratio, axes_dim[2]),
|
||||||
|
};
|
||||||
|
return Rope::embed_nd(ids, bs, axis_thetas, axes_dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_cgraph* build_graph(ggml_tensor* x,
|
||||||
|
ggml_tensor* timesteps,
|
||||||
|
ggml_tensor* context,
|
||||||
|
ggml_tensor* t5_ids = nullptr,
|
||||||
|
ggml_tensor* t5_weights = nullptr) {
|
||||||
|
GGML_ASSERT(x->ne[3] == 1);
|
||||||
|
ggml_cgraph* gf = new_graph_custom(ANIMA_GRAPH_SIZE);
|
||||||
|
|
||||||
|
x = to_backend(x);
|
||||||
|
timesteps = to_backend(timesteps);
|
||||||
|
context = to_backend(context);
|
||||||
|
t5_ids = to_backend(t5_ids);
|
||||||
|
t5_weights = to_backend(t5_weights);
|
||||||
|
|
||||||
|
int64_t pad_h = (net.patch_size - x->ne[1] % net.patch_size) % net.patch_size;
|
||||||
|
int64_t pad_w = (net.patch_size - x->ne[0] % net.patch_size) % net.patch_size;
|
||||||
|
int64_t h_pad = x->ne[1] + pad_h;
|
||||||
|
int64_t w_pad = x->ne[0] + pad_w;
|
||||||
|
|
||||||
|
image_pe_vec = gen_anima_image_pe_vec(1,
|
||||||
|
static_cast<int>(h_pad),
|
||||||
|
static_cast<int>(w_pad),
|
||||||
|
static_cast<int>(net.patch_size),
|
||||||
|
net.theta,
|
||||||
|
net.axes_dim,
|
||||||
|
4.0f,
|
||||||
|
4.0f,
|
||||||
|
1.0f);
|
||||||
|
int64_t image_pos_len = static_cast<int64_t>(image_pe_vec.size()) / (2 * 2 * (net.head_dim / 2));
|
||||||
|
auto image_pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, net.head_dim / 2, image_pos_len);
|
||||||
|
set_backend_tensor_data(image_pe, image_pe_vec.data());
|
||||||
|
|
||||||
|
ggml_tensor* adapter_q_pe = nullptr;
|
||||||
|
ggml_tensor* adapter_k_pe = nullptr;
|
||||||
|
if (t5_ids != nullptr) {
|
||||||
|
int64_t target_len = t5_ids->ne[0];
|
||||||
|
int64_t source_len = context->ne[1];
|
||||||
|
|
||||||
|
adapter_q_pe_vec = gen_1d_rope_pe_vec(target_len, 64, 10000.f);
|
||||||
|
adapter_k_pe_vec = gen_1d_rope_pe_vec(source_len, 64, 10000.f);
|
||||||
|
|
||||||
|
int64_t target_pos_len = static_cast<int64_t>(adapter_q_pe_vec.size()) / (2 * 2 * 32);
|
||||||
|
int64_t source_pos_len = static_cast<int64_t>(adapter_k_pe_vec.size()) / (2 * 2 * 32);
|
||||||
|
|
||||||
|
adapter_q_pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, 32, target_pos_len);
|
||||||
|
adapter_k_pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, 32, source_pos_len);
|
||||||
|
set_backend_tensor_data(adapter_q_pe, adapter_q_pe_vec.data());
|
||||||
|
set_backend_tensor_data(adapter_k_pe, adapter_k_pe_vec.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto runner_ctx = get_context();
|
||||||
|
auto out = net.forward(&runner_ctx,
|
||||||
|
x,
|
||||||
|
timesteps,
|
||||||
|
context,
|
||||||
|
image_pe,
|
||||||
|
t5_ids,
|
||||||
|
t5_weights,
|
||||||
|
adapter_q_pe,
|
||||||
|
adapter_k_pe);
|
||||||
|
|
||||||
|
ggml_build_forward_expand(gf, out);
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
|
ggml_tensor* x,
|
||||||
|
ggml_tensor* timesteps,
|
||||||
|
ggml_tensor* context,
|
||||||
|
ggml_tensor* t5_ids = nullptr,
|
||||||
|
ggml_tensor* t5_weights = nullptr,
|
||||||
|
ggml_tensor** output = nullptr,
|
||||||
|
ggml_context* output_ctx = nullptr) {
|
||||||
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
|
return build_graph(x, timesteps, context, t5_ids, t5_weights);
|
||||||
|
};
|
||||||
|
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace Anima
|
||||||
|
|
||||||
|
#endif // __ANIMA_HPP__
|
||||||
@ -1,8 +1,7 @@
|
|||||||
#ifndef __VAE_HPP__
|
#ifndef __AUTO_ENCODER_KL_HPP__
|
||||||
#define __VAE_HPP__
|
#define __AUTO_ENCODER_KL_HPP__
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "vae.hpp"
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
|
|
||||||
/*================================================== AutoEncoderKL ===================================================*/
|
/*================================================== AutoEncoderKL ===================================================*/
|
||||||
|
|
||||||
@ -30,7 +29,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
// t_emb is always None
|
// t_emb is always None
|
||||||
auto norm1 = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm1"]);
|
auto norm1 = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm1"]);
|
||||||
@ -66,7 +65,7 @@ protected:
|
|||||||
int64_t in_channels;
|
int64_t in_channels;
|
||||||
bool use_linear;
|
bool use_linear;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
|
||||||
auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
|
auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
|
||||||
if (iter != tensor_storage_map.end()) {
|
if (iter != tensor_storage_map.end()) {
|
||||||
if (iter->second.n_dims == 4 && use_linear) {
|
if (iter->second.n_dims == 4 && use_linear) {
|
||||||
@ -102,7 +101,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
||||||
auto q_proj = std::dynamic_pointer_cast<UnaryBlock>(blocks["q"]);
|
auto q_proj = std::dynamic_pointer_cast<UnaryBlock>(blocks["q"]);
|
||||||
@ -127,8 +126,6 @@ public:
|
|||||||
q = q_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
q = q_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
||||||
k = k_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
k = k_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
||||||
v = v_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
v = v_proj->forward(ctx, h_); // [N, h * w, in_channels]
|
||||||
|
|
||||||
v = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, v, 1, 0, 2, 3)); // [N, in_channels, h * w]
|
|
||||||
} else {
|
} else {
|
||||||
q = q_proj->forward(ctx, h_); // [N, in_channels, h, w]
|
q = q_proj->forward(ctx, h_); // [N, in_channels, h, w]
|
||||||
q = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, q, 1, 2, 0, 3)); // [N, h, w, in_channels]
|
q = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, q, 1, 2, 0, 3)); // [N, h, w, in_channels]
|
||||||
@ -139,10 +136,11 @@ public:
|
|||||||
k = ggml_reshape_3d(ctx->ggml_ctx, k, c, h * w, n); // [N, h * w, in_channels]
|
k = ggml_reshape_3d(ctx->ggml_ctx, k, c, h * w, n); // [N, h * w, in_channels]
|
||||||
|
|
||||||
v = v_proj->forward(ctx, h_); // [N, in_channels, h, w]
|
v = v_proj->forward(ctx, h_); // [N, in_channels, h, w]
|
||||||
v = ggml_reshape_3d(ctx->ggml_ctx, v, h * w, c, n); // [N, in_channels, h * w]
|
v = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, v, 1, 2, 0, 3)); // [N, h, w, in_channels]
|
||||||
|
v = ggml_reshape_3d(ctx->ggml_ctx, v, c, h * w, n); // [N, h * w, in_channels]
|
||||||
}
|
}
|
||||||
|
|
||||||
h_ = ggml_ext_attention(ctx->ggml_ctx, q, k, v, false); // [N, h * w, in_channels]
|
h_ = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, 1, nullptr, false, ctx->flash_attn_enabled);
|
||||||
|
|
||||||
if (use_linear) {
|
if (use_linear) {
|
||||||
h_ = proj_out->forward(ctx, h_); // [N, h * w, in_channels]
|
h_ = proj_out->forward(ctx, h_); // [N, h * w, in_channels]
|
||||||
@ -166,27 +164,27 @@ public:
|
|||||||
AE3DConv(int64_t in_channels,
|
AE3DConv(int64_t in_channels,
|
||||||
int64_t out_channels,
|
int64_t out_channels,
|
||||||
std::pair<int, int> kernel_size,
|
std::pair<int, int> kernel_size,
|
||||||
int64_t video_kernel_size = 3,
|
int video_kernel_size = 3,
|
||||||
std::pair<int, int> stride = {1, 1},
|
std::pair<int, int> stride = {1, 1},
|
||||||
std::pair<int, int> padding = {0, 0},
|
std::pair<int, int> padding = {0, 0},
|
||||||
std::pair<int, int> dilation = {1, 1},
|
std::pair<int, int> dilation = {1, 1},
|
||||||
bool bias = true)
|
bool bias = true)
|
||||||
: Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias) {
|
: Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias) {
|
||||||
int64_t kernel_padding = video_kernel_size / 2;
|
int kernel_padding = video_kernel_size / 2;
|
||||||
blocks["time_mix_conv"] = std::shared_ptr<GGMLBlock>(new Conv3dnx1x1(out_channels,
|
blocks["time_mix_conv"] = std::shared_ptr<GGMLBlock>(new Conv3d(out_channels,
|
||||||
out_channels,
|
out_channels,
|
||||||
video_kernel_size,
|
{video_kernel_size, 1, 1},
|
||||||
1,
|
{1, 1, 1},
|
||||||
kernel_padding));
|
{kernel_padding, 0, 0}));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x) override {
|
ggml_tensor* x) override {
|
||||||
// timesteps always None
|
// timesteps always None
|
||||||
// skip_video always False
|
// skip_video always False
|
||||||
// x: [N, IC, IH, IW]
|
// x: [N, IC, IH, IW]
|
||||||
// result: [N, OC, OH, OW]
|
// result: [N, OC, OH, OW]
|
||||||
auto time_mix_conv = std::dynamic_pointer_cast<Conv3dnx1x1>(blocks["time_mix_conv"]);
|
auto time_mix_conv = std::dynamic_pointer_cast<Conv3d>(blocks["time_mix_conv"]);
|
||||||
|
|
||||||
x = Conv2d::forward(ctx, x);
|
x = Conv2d::forward(ctx, x);
|
||||||
// timesteps = x.shape[0]
|
// timesteps = x.shape[0]
|
||||||
@ -210,7 +208,7 @@ public:
|
|||||||
|
|
||||||
class VideoResnetBlock : public ResnetBlock {
|
class VideoResnetBlock : public ResnetBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
||||||
enum ggml_type wtype = get_type(prefix + "mix_factor", tensor_storage_map, GGML_TYPE_F32);
|
enum ggml_type wtype = get_type(prefix + "mix_factor", tensor_storage_map, GGML_TYPE_F32);
|
||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
}
|
}
|
||||||
@ -229,7 +227,7 @@ public:
|
|||||||
blocks["time_stack"] = std::shared_ptr<GGMLBlock>(new ResBlock(out_channels, 0, out_channels, {video_kernel_size, 1}, 3, false, true));
|
blocks["time_stack"] = std::shared_ptr<GGMLBlock>(new ResBlock(out_channels, 0, out_channels, {video_kernel_size, 1}, 3, false, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
// x: [N, in_channels, h, w] aka [b*t, in_channels, h, w]
|
// x: [N, in_channels, h, w] aka [b*t, in_channels, h, w]
|
||||||
// return: [N, out_channels, h, w] aka [b*t, out_channels, h, w]
|
// return: [N, out_channels, h, w] aka [b*t, out_channels, h, w]
|
||||||
// t_emb is always None
|
// t_emb is always None
|
||||||
@ -254,8 +252,8 @@ public:
|
|||||||
|
|
||||||
float alpha = get_alpha();
|
float alpha = get_alpha();
|
||||||
x = ggml_add(ctx->ggml_ctx,
|
x = ggml_add(ctx->ggml_ctx,
|
||||||
ggml_scale(ctx->ggml_ctx, x, alpha),
|
ggml_ext_scale(ctx->ggml_ctx, x, alpha),
|
||||||
ggml_scale(ctx->ggml_ctx, x_mix, 1.0f - alpha));
|
ggml_ext_scale(ctx->ggml_ctx, x_mix, 1.0f - alpha));
|
||||||
|
|
||||||
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
|
x = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x, 0, 2, 1, 3)); // b c t (h w) -> b t c (h w)
|
||||||
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
|
x = ggml_reshape_4d(ctx->ggml_ctx, x, W, H, C, T * B); // b t c (h w) -> (b t) c h w
|
||||||
@ -319,7 +317,7 @@ public:
|
|||||||
blocks["conv_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(block_in, double_z ? z_channels * 2 : z_channels, {3, 3}, {1, 1}, {1, 1}));
|
blocks["conv_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(block_in, double_z ? z_channels * 2 : z_channels, {3, 3}, {1, 1}, {1, 1}));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
virtual ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
|
|
||||||
auto conv_in = std::dynamic_pointer_cast<Conv2d>(blocks["conv_in"]);
|
auto conv_in = std::dynamic_pointer_cast<Conv2d>(blocks["conv_in"]);
|
||||||
@ -409,7 +407,7 @@ public:
|
|||||||
z_channels(z_channels),
|
z_channels(z_channels),
|
||||||
video_decoder(video_decoder),
|
video_decoder(video_decoder),
|
||||||
video_kernel_size(video_kernel_size) {
|
video_kernel_size(video_kernel_size) {
|
||||||
size_t num_resolutions = ch_mult.size();
|
int num_resolutions = static_cast<int>(ch_mult.size());
|
||||||
int block_in = ch * ch_mult[num_resolutions - 1];
|
int block_in = ch * ch_mult[num_resolutions - 1];
|
||||||
|
|
||||||
blocks["conv_in"] = std::shared_ptr<GGMLBlock>(new Conv2d(z_channels, block_in, {3, 3}, {1, 1}, {1, 1}));
|
blocks["conv_in"] = std::shared_ptr<GGMLBlock>(new Conv2d(z_channels, block_in, {3, 3}, {1, 1}, {1, 1}));
|
||||||
@ -437,7 +435,7 @@ public:
|
|||||||
blocks["conv_out"] = get_conv_out(block_in, out_ch, {3, 3}, {1, 1}, {1, 1});
|
blocks["conv_out"] = get_conv_out(block_in, out_ch, {3, 3}, {1, 1}, {1, 1});
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* z) {
|
virtual ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* z) {
|
||||||
// z: [N, z_channels, h, w]
|
// z: [N, z_channels, h, w]
|
||||||
// alpha is always 0
|
// alpha is always 0
|
||||||
// merge_strategy is always learned
|
// merge_strategy is always learned
|
||||||
@ -461,7 +459,7 @@ public:
|
|||||||
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
|
h = mid_block_2->forward(ctx, h); // [N, block_in, h, w]
|
||||||
|
|
||||||
// upsampling
|
// upsampling
|
||||||
size_t num_resolutions = ch_mult.size();
|
int num_resolutions = static_cast<int>(ch_mult.size());
|
||||||
for (int i = num_resolutions - 1; i >= 0; i--) {
|
for (int i = num_resolutions - 1; i >= 0; i--) {
|
||||||
for (int j = 0; j < num_res_blocks + 1; j++) {
|
for (int j = 0; j < num_res_blocks + 1; j++) {
|
||||||
std::string name = "up." + std::to_string(i) + ".block." + std::to_string(j);
|
std::string name = "up." + std::to_string(i) + ".block." + std::to_string(j);
|
||||||
@ -485,8 +483,9 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// ldm.models.autoencoder.AutoencoderKL
|
// ldm.models.autoencoder.AutoencoderKL
|
||||||
class AutoencodingEngine : public GGMLBlock {
|
class AutoEncoderKLModel : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
|
SDVersion version;
|
||||||
bool decode_only = true;
|
bool decode_only = true;
|
||||||
bool use_video_decoder = false;
|
bool use_video_decoder = false;
|
||||||
bool use_quant = true;
|
bool use_quant = true;
|
||||||
@ -503,14 +502,19 @@ protected:
|
|||||||
} dd_config;
|
} dd_config;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AutoencodingEngine(SDVersion version = VERSION_SD1,
|
AutoEncoderKLModel(SDVersion version = VERSION_SD1,
|
||||||
bool decode_only = true,
|
bool decode_only = true,
|
||||||
bool use_linear_projection = false,
|
bool use_linear_projection = false,
|
||||||
bool use_video_decoder = false)
|
bool use_video_decoder = false)
|
||||||
: decode_only(decode_only), use_video_decoder(use_video_decoder) {
|
: version(version), decode_only(decode_only), use_video_decoder(use_video_decoder) {
|
||||||
if (sd_version_is_dit(version)) {
|
if (sd_version_is_dit(version)) {
|
||||||
dd_config.z_channels = 16;
|
if (sd_version_is_flux2(version)) {
|
||||||
|
dd_config.z_channels = 32;
|
||||||
|
embed_dim = 32;
|
||||||
|
} else {
|
||||||
use_quant = false;
|
use_quant = false;
|
||||||
|
dd_config.z_channels = 16;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (use_video_decoder) {
|
if (use_video_decoder) {
|
||||||
use_quant = false;
|
use_quant = false;
|
||||||
@ -545,8 +549,26 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* decode(GGMLRunnerContext* ctx, struct ggml_tensor* z) {
|
ggml_tensor* decode(GGMLRunnerContext* ctx, ggml_tensor* z) {
|
||||||
// z: [N, z_channels, h, w]
|
// z: [N, z_channels, h, w]
|
||||||
|
if (sd_version_is_flux2(version)) {
|
||||||
|
// [N, C*p*p, h, w] -> [N, C, h*p, w*p]
|
||||||
|
int64_t p = 2;
|
||||||
|
|
||||||
|
int64_t N = z->ne[3];
|
||||||
|
int64_t C = z->ne[2] / p / p;
|
||||||
|
int64_t h = z->ne[1];
|
||||||
|
int64_t w = z->ne[0];
|
||||||
|
int64_t H = h * p;
|
||||||
|
int64_t W = w * p;
|
||||||
|
|
||||||
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, w * h, p * p, C, N); // [N, C, p*p, h*w]
|
||||||
|
z = ggml_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, z, 1, 0, 2, 3)); // [N, C, h*w, p*p]
|
||||||
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, p, p, w, h * C * N); // [N*C*h, w, p, p]
|
||||||
|
z = ggml_cont(ctx->ggml_ctx, ggml_ext_torch_permute(ctx->ggml_ctx, z, 0, 2, 1, 3)); // [N*C*h, p, w, p]
|
||||||
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, W, H, C, N); // [N, C, h*p, w*p]
|
||||||
|
}
|
||||||
|
|
||||||
if (use_quant) {
|
if (use_quant) {
|
||||||
auto post_quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["post_quant_conv"]);
|
auto post_quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["post_quant_conv"]);
|
||||||
z = post_quant_conv->forward(ctx, z); // [N, z_channels, h, w]
|
z = post_quant_conv->forward(ctx, z); // [N, z_channels, h, w]
|
||||||
@ -559,58 +581,50 @@ public:
|
|||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* encode(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* encode(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
auto encoder = std::dynamic_pointer_cast<Encoder>(blocks["encoder"]);
|
auto encoder = std::dynamic_pointer_cast<Encoder>(blocks["encoder"]);
|
||||||
|
|
||||||
auto h = encoder->forward(ctx, x); // [N, 2*z_channels, h/8, w/8]
|
auto z = encoder->forward(ctx, x); // [N, 2*z_channels, h/8, w/8]
|
||||||
if (use_quant) {
|
if (use_quant) {
|
||||||
auto quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["quant_conv"]);
|
auto quant_conv = std::dynamic_pointer_cast<Conv2d>(blocks["quant_conv"]);
|
||||||
h = quant_conv->forward(ctx, h); // [N, 2*embed_dim, h/8, w/8]
|
z = quant_conv->forward(ctx, z); // [N, 2*embed_dim, h/8, w/8]
|
||||||
}
|
}
|
||||||
return h;
|
if (sd_version_is_flux2(version)) {
|
||||||
}
|
z = ggml_ext_chunk(ctx->ggml_ctx, z, 2, 2)[0];
|
||||||
};
|
|
||||||
|
|
||||||
struct VAE : public GGMLRunner {
|
// [N, C, H, W] -> [N, C*p*p, H/p, W/p]
|
||||||
VAE(ggml_backend_t backend, bool offload_params_to_cpu)
|
int64_t p = 2;
|
||||||
: GGMLRunner(backend, offload_params_to_cpu) {}
|
int64_t N = z->ne[3];
|
||||||
virtual void compute(const int n_threads,
|
int64_t C = z->ne[2];
|
||||||
struct ggml_tensor* z,
|
int64_t H = z->ne[1];
|
||||||
bool decode_graph,
|
int64_t W = z->ne[0];
|
||||||
struct ggml_tensor** output,
|
int64_t h = H / p;
|
||||||
struct ggml_context* output_ctx) = 0;
|
int64_t w = W / p;
|
||||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) = 0;
|
|
||||||
virtual void set_conv2d_scale(float scale) { SD_UNUSED(scale); };
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FakeVAE : public VAE {
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, p, w, p, h * C * N); // [N*C*h, p, w, p]
|
||||||
FakeVAE(ggml_backend_t backend, bool offload_params_to_cpu)
|
z = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, z, 0, 2, 1, 3)); // [N*C*h, w, p, p]
|
||||||
: VAE(backend, offload_params_to_cpu) {}
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, p * p, w * h, C, N); // [N, C, h*w, p*p]
|
||||||
void compute(const int n_threads,
|
z = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, z, 1, 0, 2, 3)); // [N, C, p*p, h*w]
|
||||||
struct ggml_tensor* z,
|
z = ggml_reshape_4d(ctx->ggml_ctx, z, w, h, p * p * C, N); // [N, C*p*p, h*w]
|
||||||
bool decode_graph,
|
|
||||||
struct ggml_tensor** output,
|
|
||||||
struct ggml_context* output_ctx) override {
|
|
||||||
if (*output == nullptr && output_ctx != nullptr) {
|
|
||||||
*output = ggml_dup_tensor(output_ctx, z);
|
|
||||||
}
|
}
|
||||||
ggml_ext_tensor_iter(z, [&](ggml_tensor* z, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
return z;
|
||||||
float value = ggml_ext_tensor_get_f32(z, i0, i1, i2, i3);
|
|
||||||
ggml_ext_tensor_set_f32(*output, value, i0, i1, i2, i3);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) override {}
|
int get_encoder_output_channels() {
|
||||||
|
int factor = dd_config.double_z ? 2 : 1;
|
||||||
std::string get_desc() override {
|
if (sd_version_is_flux2(version)) {
|
||||||
return "fake_vae";
|
return dd_config.z_channels * 4;
|
||||||
|
}
|
||||||
|
return dd_config.z_channels * factor;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AutoEncoderKL : public VAE {
|
struct AutoEncoderKL : public VAE {
|
||||||
|
float scale_factor = 1.f;
|
||||||
|
float shift_factor = 0.f;
|
||||||
bool decode_only = true;
|
bool decode_only = true;
|
||||||
AutoencodingEngine ae;
|
AutoEncoderKLModel ae;
|
||||||
|
|
||||||
AutoEncoderKL(ggml_backend_t backend,
|
AutoEncoderKL(ggml_backend_t backend,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
@ -619,7 +633,23 @@ struct AutoEncoderKL : public VAE {
|
|||||||
bool decode_only = false,
|
bool decode_only = false,
|
||||||
bool use_video_decoder = false,
|
bool use_video_decoder = false,
|
||||||
SDVersion version = VERSION_SD1)
|
SDVersion version = VERSION_SD1)
|
||||||
: decode_only(decode_only), VAE(backend, offload_params_to_cpu) {
|
: decode_only(decode_only), VAE(version, backend, offload_params_to_cpu) {
|
||||||
|
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
||||||
|
scale_factor = 0.18215f;
|
||||||
|
shift_factor = 0.f;
|
||||||
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
|
scale_factor = 0.13025f;
|
||||||
|
shift_factor = 0.f;
|
||||||
|
} else if (sd_version_is_sd3(version)) {
|
||||||
|
scale_factor = 1.5305f;
|
||||||
|
shift_factor = 0.0609f;
|
||||||
|
} else if (sd_version_is_flux(version) || sd_version_is_z_image(version)) {
|
||||||
|
scale_factor = 0.3611f;
|
||||||
|
shift_factor = 0.1159f;
|
||||||
|
} else if (sd_version_is_flux2(version)) {
|
||||||
|
scale_factor = 1.0f;
|
||||||
|
shift_factor = 0.f;
|
||||||
|
}
|
||||||
bool use_linear_projection = false;
|
bool use_linear_projection = false;
|
||||||
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
for (const auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
if (!starts_with(name, prefix)) {
|
if (!starts_with(name, prefix)) {
|
||||||
@ -632,7 +662,7 @@ struct AutoEncoderKL : public VAE {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ae = AutoencodingEngine(version, decode_only, use_linear_projection, use_video_decoder);
|
ae = AutoEncoderKLModel(version, decode_only, use_linear_projection, use_video_decoder);
|
||||||
ae.init(params_ctx, tensor_storage_map, prefix);
|
ae.init(params_ctx, tensor_storage_map, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -651,45 +681,215 @@ struct AutoEncoderKL : public VAE {
|
|||||||
return "vae";
|
return "vae";
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) override {
|
||||||
ae.get_param_tensors(tensors, prefix);
|
ae.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* z, bool decode_graph) {
|
ggml_cgraph* build_graph(ggml_tensor* z, bool decode_graph) {
|
||||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||||
|
|
||||||
z = to_backend(z);
|
z = to_backend(z);
|
||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
|
|
||||||
struct ggml_tensor* out = decode_graph ? ae.decode(&runner_ctx, z) : ae.encode(&runner_ctx, z);
|
ggml_tensor* out = decode_graph ? ae.decode(&runner_ctx, z) : ae.encode(&runner_ctx, z);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, out);
|
ggml_build_forward_expand(gf, out);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(const int n_threads,
|
bool _compute(const int n_threads,
|
||||||
struct ggml_tensor* z,
|
ggml_tensor* z,
|
||||||
bool decode_graph,
|
bool decode_graph,
|
||||||
struct ggml_tensor** output,
|
ggml_tensor** output,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
GGML_ASSERT(!decode_only || decode_graph);
|
GGML_ASSERT(!decode_only || decode_graph);
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(z, decode_graph);
|
return build_graph(z, decode_graph);
|
||||||
};
|
};
|
||||||
// ggml_set_f32(z, 0.5f);
|
// ggml_set_f32(z, 0.5f);
|
||||||
// print_ggml_tensor(z);
|
// print_ggml_tensor(z);
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* gaussian_latent_sample(ggml_context* work_ctx, ggml_tensor* moments, std::shared_ptr<RNG> rng) {
|
||||||
|
// ldm.modules.distributions.distributions.DiagonalGaussianDistribution.sample
|
||||||
|
ggml_tensor* latents = ggml_new_tensor_4d(work_ctx, moments->type, moments->ne[0], moments->ne[1], moments->ne[2] / 2, moments->ne[3]);
|
||||||
|
ggml_tensor* noise = ggml_dup_tensor(work_ctx, latents);
|
||||||
|
ggml_ext_im_set_randn_f32(noise, rng);
|
||||||
|
{
|
||||||
|
float mean = 0;
|
||||||
|
float logvar = 0;
|
||||||
|
float value = 0;
|
||||||
|
float std_ = 0;
|
||||||
|
for (int i = 0; i < latents->ne[3]; i++) {
|
||||||
|
for (int j = 0; j < latents->ne[2]; j++) {
|
||||||
|
for (int k = 0; k < latents->ne[1]; k++) {
|
||||||
|
for (int l = 0; l < latents->ne[0]; l++) {
|
||||||
|
mean = ggml_ext_tensor_get_f32(moments, l, k, j, i);
|
||||||
|
logvar = ggml_ext_tensor_get_f32(moments, l, k, j + (int)latents->ne[2], i);
|
||||||
|
logvar = std::max(-30.0f, std::min(logvar, 20.0f));
|
||||||
|
std_ = std::exp(0.5f * logvar);
|
||||||
|
value = mean + std_ * ggml_ext_tensor_get_f32(noise, l, k, j, i);
|
||||||
|
// printf("%d %d %d %d -> %f\n", i, j, k, l, value);
|
||||||
|
ggml_ext_tensor_set_f32(latents, value, l, k, j, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return latents;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* vae_output_to_latents(ggml_context* work_ctx, ggml_tensor* vae_output, std::shared_ptr<RNG> rng) {
|
||||||
|
if (sd_version_is_flux2(version)) {
|
||||||
|
return vae_output;
|
||||||
|
} else if (version == VERSION_SD1_PIX2PIX) {
|
||||||
|
return ggml_view_3d(work_ctx,
|
||||||
|
vae_output,
|
||||||
|
vae_output->ne[0],
|
||||||
|
vae_output->ne[1],
|
||||||
|
vae_output->ne[2] / 2,
|
||||||
|
vae_output->nb[1],
|
||||||
|
vae_output->nb[2],
|
||||||
|
0);
|
||||||
|
} else {
|
||||||
|
return gaussian_latent_sample(work_ctx, vae_output, rng);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_latents_mean_std_vec(ggml_tensor* latents, int channel_dim, std::vector<float>& latents_mean_vec, std::vector<float>& latents_std_vec) {
|
||||||
|
// flux2
|
||||||
|
if (sd_version_is_flux2(version)) {
|
||||||
|
GGML_ASSERT(latents->ne[channel_dim] == 128);
|
||||||
|
latents_mean_vec = {-0.0676f, -0.0715f, -0.0753f, -0.0745f, 0.0223f, 0.0180f, 0.0142f, 0.0184f,
|
||||||
|
-0.0001f, -0.0063f, -0.0002f, -0.0031f, -0.0272f, -0.0281f, -0.0276f, -0.0290f,
|
||||||
|
-0.0769f, -0.0672f, -0.0902f, -0.0892f, 0.0168f, 0.0152f, 0.0079f, 0.0086f,
|
||||||
|
0.0083f, 0.0015f, 0.0003f, -0.0043f, -0.0439f, -0.0419f, -0.0438f, -0.0431f,
|
||||||
|
-0.0102f, -0.0132f, -0.0066f, -0.0048f, -0.0311f, -0.0306f, -0.0279f, -0.0180f,
|
||||||
|
0.0030f, 0.0015f, 0.0126f, 0.0145f, 0.0347f, 0.0338f, 0.0337f, 0.0283f,
|
||||||
|
0.0020f, 0.0047f, 0.0047f, 0.0050f, 0.0123f, 0.0081f, 0.0081f, 0.0146f,
|
||||||
|
0.0681f, 0.0679f, 0.0767f, 0.0732f, -0.0462f, -0.0474f, -0.0392f, -0.0511f,
|
||||||
|
-0.0528f, -0.0477f, -0.0470f, -0.0517f, -0.0317f, -0.0316f, -0.0345f, -0.0283f,
|
||||||
|
0.0510f, 0.0445f, 0.0578f, 0.0458f, -0.0412f, -0.0458f, -0.0487f, -0.0467f,
|
||||||
|
-0.0088f, -0.0106f, -0.0088f, -0.0046f, -0.0376f, -0.0432f, -0.0436f, -0.0499f,
|
||||||
|
0.0118f, 0.0166f, 0.0203f, 0.0279f, 0.0113f, 0.0129f, 0.0016f, 0.0072f,
|
||||||
|
-0.0118f, -0.0018f, -0.0141f, -0.0054f, -0.0091f, -0.0138f, -0.0145f, -0.0187f,
|
||||||
|
0.0323f, 0.0305f, 0.0259f, 0.0300f, 0.0540f, 0.0614f, 0.0495f, 0.0590f,
|
||||||
|
-0.0511f, -0.0603f, -0.0478f, -0.0524f, -0.0227f, -0.0274f, -0.0154f, -0.0255f,
|
||||||
|
-0.0572f, -0.0565f, -0.0518f, -0.0496f, 0.0116f, 0.0054f, 0.0163f, 0.0104f};
|
||||||
|
latents_std_vec = {
|
||||||
|
1.8029f, 1.7786f, 1.7868f, 1.7837f, 1.7717f, 1.7590f, 1.7610f, 1.7479f,
|
||||||
|
1.7336f, 1.7373f, 1.7340f, 1.7343f, 1.8626f, 1.8527f, 1.8629f, 1.8589f,
|
||||||
|
1.7593f, 1.7526f, 1.7556f, 1.7583f, 1.7363f, 1.7400f, 1.7355f, 1.7394f,
|
||||||
|
1.7342f, 1.7246f, 1.7392f, 1.7304f, 1.7551f, 1.7513f, 1.7559f, 1.7488f,
|
||||||
|
1.8449f, 1.8454f, 1.8550f, 1.8535f, 1.8240f, 1.7813f, 1.7854f, 1.7945f,
|
||||||
|
1.8047f, 1.7876f, 1.7695f, 1.7676f, 1.7782f, 1.7667f, 1.7925f, 1.7848f,
|
||||||
|
1.7579f, 1.7407f, 1.7483f, 1.7368f, 1.7961f, 1.7998f, 1.7920f, 1.7925f,
|
||||||
|
1.7780f, 1.7747f, 1.7727f, 1.7749f, 1.7526f, 1.7447f, 1.7657f, 1.7495f,
|
||||||
|
1.7775f, 1.7720f, 1.7813f, 1.7813f, 1.8162f, 1.8013f, 1.8023f, 1.8033f,
|
||||||
|
1.7527f, 1.7331f, 1.7563f, 1.7482f, 1.7610f, 1.7507f, 1.7681f, 1.7613f,
|
||||||
|
1.7665f, 1.7545f, 1.7828f, 1.7726f, 1.7896f, 1.7999f, 1.7864f, 1.7760f,
|
||||||
|
1.7613f, 1.7625f, 1.7560f, 1.7577f, 1.7783f, 1.7671f, 1.7810f, 1.7799f,
|
||||||
|
1.7201f, 1.7068f, 1.7265f, 1.7091f, 1.7793f, 1.7578f, 1.7502f, 1.7455f,
|
||||||
|
1.7587f, 1.7500f, 1.7525f, 1.7362f, 1.7616f, 1.7572f, 1.7444f, 1.7430f,
|
||||||
|
1.7509f, 1.7610f, 1.7634f, 1.7612f, 1.7254f, 1.7135f, 1.7321f, 1.7226f,
|
||||||
|
1.7664f, 1.7624f, 1.7718f, 1.7664f, 1.7457f, 1.7441f, 1.7569f, 1.7530f};
|
||||||
|
} else {
|
||||||
|
GGML_ABORT("unknown version %d", version);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* diffusion_to_vae_latents(ggml_context* work_ctx, ggml_tensor* latents) {
|
||||||
|
ggml_tensor* vae_latents = ggml_dup(work_ctx, latents);
|
||||||
|
if (sd_version_is_flux2(version)) {
|
||||||
|
int channel_dim = 2;
|
||||||
|
std::vector<float> latents_mean_vec;
|
||||||
|
std::vector<float> latents_std_vec;
|
||||||
|
get_latents_mean_std_vec(latents, channel_dim, latents_mean_vec, latents_std_vec);
|
||||||
|
|
||||||
|
float mean;
|
||||||
|
float std_;
|
||||||
|
for (int i = 0; i < latents->ne[3]; i++) {
|
||||||
|
if (channel_dim == 3) {
|
||||||
|
mean = latents_mean_vec[i];
|
||||||
|
std_ = latents_std_vec[i];
|
||||||
|
}
|
||||||
|
for (int j = 0; j < latents->ne[2]; j++) {
|
||||||
|
if (channel_dim == 2) {
|
||||||
|
mean = latents_mean_vec[j];
|
||||||
|
std_ = latents_std_vec[j];
|
||||||
|
}
|
||||||
|
for (int k = 0; k < latents->ne[1]; k++) {
|
||||||
|
for (int l = 0; l < latents->ne[0]; l++) {
|
||||||
|
float value = ggml_ext_tensor_get_f32(latents, l, k, j, i);
|
||||||
|
value = value * std_ / scale_factor + mean;
|
||||||
|
ggml_ext_tensor_set_f32(vae_latents, value, l, k, j, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ggml_ext_tensor_iter(latents, [&](ggml_tensor* latents, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||||
|
float value = ggml_ext_tensor_get_f32(latents, i0, i1, i2, i3);
|
||||||
|
value = (value / scale_factor) + shift_factor;
|
||||||
|
ggml_ext_tensor_set_f32(vae_latents, value, i0, i1, i2, i3);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return vae_latents;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* vae_to_diffuison_latents(ggml_context* work_ctx, ggml_tensor* latents) {
|
||||||
|
ggml_tensor* diffusion_latents = ggml_dup(work_ctx, latents);
|
||||||
|
if (sd_version_is_flux2(version)) {
|
||||||
|
int channel_dim = 2;
|
||||||
|
std::vector<float> latents_mean_vec;
|
||||||
|
std::vector<float> latents_std_vec;
|
||||||
|
get_latents_mean_std_vec(latents, channel_dim, latents_mean_vec, latents_std_vec);
|
||||||
|
|
||||||
|
float mean;
|
||||||
|
float std_;
|
||||||
|
for (int i = 0; i < latents->ne[3]; i++) {
|
||||||
|
if (channel_dim == 3) {
|
||||||
|
mean = latents_mean_vec[i];
|
||||||
|
std_ = latents_std_vec[i];
|
||||||
|
}
|
||||||
|
for (int j = 0; j < latents->ne[2]; j++) {
|
||||||
|
if (channel_dim == 2) {
|
||||||
|
mean = latents_mean_vec[j];
|
||||||
|
std_ = latents_std_vec[j];
|
||||||
|
}
|
||||||
|
for (int k = 0; k < latents->ne[1]; k++) {
|
||||||
|
for (int l = 0; l < latents->ne[0]; l++) {
|
||||||
|
float value = ggml_ext_tensor_get_f32(latents, l, k, j, i);
|
||||||
|
value = (value - mean) * scale_factor / std_;
|
||||||
|
ggml_ext_tensor_set_f32(diffusion_latents, value, l, k, j, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ggml_ext_tensor_iter(latents, [&](ggml_tensor* latents, int64_t i0, int64_t i1, int64_t i2, int64_t i3) {
|
||||||
|
float value = ggml_ext_tensor_get_f32(latents, i0, i1, i2, i3);
|
||||||
|
value = (value - shift_factor) * scale_factor;
|
||||||
|
ggml_ext_tensor_set_f32(diffusion_latents, value, i0, i1, i2, i3);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return diffusion_latents;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_encoder_output_channels(int input_channels) {
|
||||||
|
return ae.get_encoder_output_channels();
|
||||||
}
|
}
|
||||||
|
|
||||||
void test() {
|
void test() {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
struct ggml_context* work_ctx = ggml_init(params);
|
ggml_context* work_ctx = ggml_init(params);
|
||||||
GGML_ASSERT(work_ctx != nullptr);
|
GGML_ASSERT(work_ctx != nullptr);
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -700,14 +900,14 @@ struct AutoEncoderKL : public VAE {
|
|||||||
auto x = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 64, 64, 3, 2);
|
auto x = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 64, 64, 3, 2);
|
||||||
ggml_set_f32(x, 0.5f);
|
ggml_set_f32(x, 0.5f);
|
||||||
print_ggml_tensor(x);
|
print_ggml_tensor(x);
|
||||||
struct ggml_tensor* out = nullptr;
|
ggml_tensor* out = nullptr;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
compute(8, x, false, &out, work_ctx);
|
_compute(8, x, false, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int64_t t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
LOG_DEBUG("encode test done in %dms", t1 - t0);
|
LOG_DEBUG("encode test done in %lldms", t1 - t0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (false) {
|
if (false) {
|
||||||
@ -718,16 +918,16 @@ struct AutoEncoderKL : public VAE {
|
|||||||
auto z = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 4, 1);
|
auto z = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 8, 8, 4, 1);
|
||||||
ggml_set_f32(z, 0.5f);
|
ggml_set_f32(z, 0.5f);
|
||||||
print_ggml_tensor(z);
|
print_ggml_tensor(z);
|
||||||
struct ggml_tensor* out = nullptr;
|
ggml_tensor* out = nullptr;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
compute(8, z, true, &out, work_ctx);
|
_compute(8, z, true, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int64_t t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
LOG_DEBUG("decode test done in %dms", t1 - t0);
|
LOG_DEBUG("decode test done in %lldms", t1 - t0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif // __AUTO_ENCODER_KL_HPP__
|
||||||
894
src/cache_dit.hpp
Normal file
@ -0,0 +1,894 @@
|
|||||||
|
#ifndef __CACHE_DIT_HPP__
|
||||||
|
#define __CACHE_DIT_HPP__
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <limits>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
|
struct DBCacheConfig {
|
||||||
|
bool enabled = false;
|
||||||
|
int Fn_compute_blocks = 8;
|
||||||
|
int Bn_compute_blocks = 0;
|
||||||
|
float residual_diff_threshold = 0.08f;
|
||||||
|
int max_warmup_steps = 8;
|
||||||
|
int max_cached_steps = -1;
|
||||||
|
int max_continuous_cached_steps = -1;
|
||||||
|
float max_accumulated_residual_diff = -1.0f;
|
||||||
|
std::vector<int> steps_computation_mask;
|
||||||
|
bool scm_policy_dynamic = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TaylorSeerConfig {
|
||||||
|
bool enabled = false;
|
||||||
|
int n_derivatives = 1;
|
||||||
|
int max_warmup_steps = 2;
|
||||||
|
int skip_interval_steps = 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CacheDitConfig {
|
||||||
|
DBCacheConfig dbcache;
|
||||||
|
TaylorSeerConfig taylorseer;
|
||||||
|
int double_Fn_blocks = -1;
|
||||||
|
int double_Bn_blocks = -1;
|
||||||
|
int single_Fn_blocks = -1;
|
||||||
|
int single_Bn_blocks = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TaylorSeerState {
|
||||||
|
int n_derivatives = 1;
|
||||||
|
int current_step = -1;
|
||||||
|
int last_computed_step = -1;
|
||||||
|
std::vector<std::vector<float>> dY_prev;
|
||||||
|
std::vector<std::vector<float>> dY_current;
|
||||||
|
|
||||||
|
void init(int n_deriv, size_t hidden_size) {
|
||||||
|
n_derivatives = n_deriv;
|
||||||
|
int order = n_derivatives + 1;
|
||||||
|
dY_prev.resize(order);
|
||||||
|
dY_current.resize(order);
|
||||||
|
for (int i = 0; i < order; i++) {
|
||||||
|
dY_prev[i].clear();
|
||||||
|
dY_current[i].clear();
|
||||||
|
}
|
||||||
|
current_step = -1;
|
||||||
|
last_computed_step = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
for (auto& v : dY_prev)
|
||||||
|
v.clear();
|
||||||
|
for (auto& v : dY_current)
|
||||||
|
v.clear();
|
||||||
|
current_step = -1;
|
||||||
|
last_computed_step = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool can_approximate() const {
|
||||||
|
return last_computed_step >= n_derivatives && !dY_prev.empty() && !dY_prev[0].empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_derivatives(const float* Y, size_t size, int step) {
|
||||||
|
int order = n_derivatives + 1;
|
||||||
|
dY_prev = dY_current;
|
||||||
|
dY_current[0].resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
dY_current[0][i] = Y[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int window = step - last_computed_step;
|
||||||
|
if (window <= 0)
|
||||||
|
window = 1;
|
||||||
|
|
||||||
|
for (int d = 0; d < n_derivatives; d++) {
|
||||||
|
if (!dY_prev[d].empty() && dY_prev[d].size() == size) {
|
||||||
|
dY_current[d + 1].resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
dY_current[d + 1][i] = (dY_current[d][i] - dY_prev[d][i]) / static_cast<float>(window);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
dY_current[d + 1].clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current_step = step;
|
||||||
|
last_computed_step = step;
|
||||||
|
}
|
||||||
|
|
||||||
|
void approximate(float* output, size_t size, int target_step) const {
|
||||||
|
if (!can_approximate() || dY_prev[0].size() != size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int elapsed = target_step - last_computed_step;
|
||||||
|
if (elapsed <= 0)
|
||||||
|
elapsed = 1;
|
||||||
|
|
||||||
|
std::fill(output, output + size, 0.0f);
|
||||||
|
float factorial = 1.0f;
|
||||||
|
int order = static_cast<int>(dY_prev.size());
|
||||||
|
|
||||||
|
for (int o = 0; o < order; o++) {
|
||||||
|
if (dY_prev[o].empty() || dY_prev[o].size() != size)
|
||||||
|
continue;
|
||||||
|
if (o > 0)
|
||||||
|
factorial *= static_cast<float>(o);
|
||||||
|
float coeff = ::powf(static_cast<float>(elapsed), static_cast<float>(o)) / factorial;
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
output[i] += coeff * dY_prev[o][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BlockCacheEntry {
|
||||||
|
std::vector<float> residual_img;
|
||||||
|
std::vector<float> residual_txt;
|
||||||
|
std::vector<float> residual;
|
||||||
|
std::vector<float> prev_img;
|
||||||
|
std::vector<float> prev_txt;
|
||||||
|
std::vector<float> prev_output;
|
||||||
|
bool has_prev = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CacheDitState {
|
||||||
|
CacheDitConfig config;
|
||||||
|
bool initialized = false;
|
||||||
|
|
||||||
|
int total_double_blocks = 0;
|
||||||
|
int total_single_blocks = 0;
|
||||||
|
size_t hidden_size = 0;
|
||||||
|
|
||||||
|
int current_step = -1;
|
||||||
|
int total_steps = 0;
|
||||||
|
int warmup_remaining = 0;
|
||||||
|
std::vector<int> cached_steps;
|
||||||
|
int continuous_cached_steps = 0;
|
||||||
|
float accumulated_residual_diff = 0.0f;
|
||||||
|
|
||||||
|
std::vector<BlockCacheEntry> double_block_cache;
|
||||||
|
std::vector<BlockCacheEntry> single_block_cache;
|
||||||
|
|
||||||
|
std::vector<float> Fn_residual_img;
|
||||||
|
std::vector<float> Fn_residual_txt;
|
||||||
|
std::vector<float> prev_Fn_residual_img;
|
||||||
|
std::vector<float> prev_Fn_residual_txt;
|
||||||
|
bool has_prev_Fn_residual = false;
|
||||||
|
|
||||||
|
std::vector<float> Bn_buffer_img;
|
||||||
|
std::vector<float> Bn_buffer_txt;
|
||||||
|
std::vector<float> Bn_buffer;
|
||||||
|
bool has_Bn_buffer = false;
|
||||||
|
|
||||||
|
TaylorSeerState taylor_state;
|
||||||
|
|
||||||
|
bool can_cache_this_step = false;
|
||||||
|
bool is_caching_this_step = false;
|
||||||
|
|
||||||
|
int total_blocks_computed = 0;
|
||||||
|
int total_blocks_cached = 0;
|
||||||
|
|
||||||
|
void init(const CacheDitConfig& cfg, int num_double_blocks, int num_single_blocks, size_t h_size) {
|
||||||
|
config = cfg;
|
||||||
|
total_double_blocks = num_double_blocks;
|
||||||
|
total_single_blocks = num_single_blocks;
|
||||||
|
hidden_size = h_size;
|
||||||
|
|
||||||
|
initialized = cfg.dbcache.enabled || cfg.taylorseer.enabled;
|
||||||
|
|
||||||
|
if (!initialized)
|
||||||
|
return;
|
||||||
|
|
||||||
|
warmup_remaining = cfg.dbcache.max_warmup_steps;
|
||||||
|
double_block_cache.resize(total_double_blocks);
|
||||||
|
single_block_cache.resize(total_single_blocks);
|
||||||
|
|
||||||
|
if (cfg.taylorseer.enabled) {
|
||||||
|
taylor_state.init(cfg.taylorseer.n_derivatives, h_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
reset_runtime();
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset_runtime() {
|
||||||
|
current_step = -1;
|
||||||
|
total_steps = 0;
|
||||||
|
warmup_remaining = config.dbcache.max_warmup_steps;
|
||||||
|
cached_steps.clear();
|
||||||
|
continuous_cached_steps = 0;
|
||||||
|
accumulated_residual_diff = 0.0f;
|
||||||
|
|
||||||
|
for (auto& entry : double_block_cache) {
|
||||||
|
entry.residual_img.clear();
|
||||||
|
entry.residual_txt.clear();
|
||||||
|
entry.prev_img.clear();
|
||||||
|
entry.prev_txt.clear();
|
||||||
|
entry.has_prev = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& entry : single_block_cache) {
|
||||||
|
entry.residual.clear();
|
||||||
|
entry.prev_output.clear();
|
||||||
|
entry.has_prev = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fn_residual_img.clear();
|
||||||
|
Fn_residual_txt.clear();
|
||||||
|
prev_Fn_residual_img.clear();
|
||||||
|
prev_Fn_residual_txt.clear();
|
||||||
|
has_prev_Fn_residual = false;
|
||||||
|
|
||||||
|
Bn_buffer_img.clear();
|
||||||
|
Bn_buffer_txt.clear();
|
||||||
|
Bn_buffer.clear();
|
||||||
|
has_Bn_buffer = false;
|
||||||
|
|
||||||
|
taylor_state.reset();
|
||||||
|
|
||||||
|
can_cache_this_step = false;
|
||||||
|
is_caching_this_step = false;
|
||||||
|
|
||||||
|
total_blocks_computed = 0;
|
||||||
|
total_blocks_cached = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool enabled() const {
|
||||||
|
return initialized && (config.dbcache.enabled || config.taylorseer.enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin_step(int step_index, float sigma = 0.0f) {
|
||||||
|
if (!enabled())
|
||||||
|
return;
|
||||||
|
if (step_index == current_step)
|
||||||
|
return;
|
||||||
|
|
||||||
|
current_step = step_index;
|
||||||
|
total_steps++;
|
||||||
|
|
||||||
|
bool in_warmup = warmup_remaining > 0;
|
||||||
|
if (in_warmup) {
|
||||||
|
warmup_remaining--;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool scm_allows_cache = true;
|
||||||
|
if (!config.dbcache.steps_computation_mask.empty()) {
|
||||||
|
if (step_index < static_cast<int>(config.dbcache.steps_computation_mask.size())) {
|
||||||
|
scm_allows_cache = (config.dbcache.steps_computation_mask[step_index] == 0);
|
||||||
|
if (!config.dbcache.scm_policy_dynamic && scm_allows_cache) {
|
||||||
|
can_cache_this_step = true;
|
||||||
|
is_caching_this_step = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool max_cached_ok = (config.dbcache.max_cached_steps < 0) ||
|
||||||
|
(static_cast<int>(cached_steps.size()) < config.dbcache.max_cached_steps);
|
||||||
|
|
||||||
|
bool max_cont_ok = (config.dbcache.max_continuous_cached_steps < 0) ||
|
||||||
|
(continuous_cached_steps < config.dbcache.max_continuous_cached_steps);
|
||||||
|
|
||||||
|
bool accum_ok = (config.dbcache.max_accumulated_residual_diff < 0.0f) ||
|
||||||
|
(accumulated_residual_diff < config.dbcache.max_accumulated_residual_diff);
|
||||||
|
|
||||||
|
can_cache_this_step = !in_warmup && scm_allows_cache && max_cached_ok && max_cont_ok && accum_ok && has_prev_Fn_residual;
|
||||||
|
is_caching_this_step = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void end_step(bool was_cached) {
|
||||||
|
if (was_cached) {
|
||||||
|
cached_steps.push_back(current_step);
|
||||||
|
continuous_cached_steps++;
|
||||||
|
} else {
|
||||||
|
continuous_cached_steps = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static float calculate_residual_diff(const float* prev, const float* curr, size_t size) {
|
||||||
|
if (size == 0)
|
||||||
|
return 0.0f;
|
||||||
|
|
||||||
|
float sum_diff = 0.0f;
|
||||||
|
float sum_abs = 0.0f;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
sum_diff += std::fabs(prev[i] - curr[i]);
|
||||||
|
sum_abs += std::fabs(prev[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum_diff / (sum_abs + 1e-6f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float calculate_residual_diff(const std::vector<float>& prev, const std::vector<float>& curr) {
|
||||||
|
if (prev.size() != curr.size() || prev.empty())
|
||||||
|
return 1.0f;
|
||||||
|
return calculate_residual_diff(prev.data(), curr.data(), prev.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_double_Fn_blocks() const {
|
||||||
|
return (config.double_Fn_blocks >= 0) ? config.double_Fn_blocks : config.dbcache.Fn_compute_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_double_Bn_blocks() const {
|
||||||
|
return (config.double_Bn_blocks >= 0) ? config.double_Bn_blocks : config.dbcache.Bn_compute_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_single_Fn_blocks() const {
|
||||||
|
return (config.single_Fn_blocks >= 0) ? config.single_Fn_blocks : config.dbcache.Fn_compute_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
int get_single_Bn_blocks() const {
|
||||||
|
return (config.single_Bn_blocks >= 0) ? config.single_Bn_blocks : config.dbcache.Bn_compute_blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Fn_double_block(int block_idx) const {
|
||||||
|
return block_idx < get_double_Fn_blocks();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Bn_double_block(int block_idx) const {
|
||||||
|
int Bn = get_double_Bn_blocks();
|
||||||
|
return Bn > 0 && block_idx >= (total_double_blocks - Bn);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Mn_double_block(int block_idx) const {
|
||||||
|
return !is_Fn_double_block(block_idx) && !is_Bn_double_block(block_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Fn_single_block(int block_idx) const {
|
||||||
|
return block_idx < get_single_Fn_blocks();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Bn_single_block(int block_idx) const {
|
||||||
|
int Bn = get_single_Bn_blocks();
|
||||||
|
return Bn > 0 && block_idx >= (total_single_blocks - Bn);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_Mn_single_block(int block_idx) const {
|
||||||
|
return !is_Fn_single_block(block_idx) && !is_Bn_single_block(block_idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_Fn_residual(const float* img, const float* txt, size_t img_size, size_t txt_size, const float* input_img, const float* input_txt) {
|
||||||
|
Fn_residual_img.resize(img_size);
|
||||||
|
Fn_residual_txt.resize(txt_size);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
Fn_residual_img[i] = img[i] - input_img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
Fn_residual_txt[i] = txt[i] - input_txt[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool check_cache_decision() {
|
||||||
|
if (!can_cache_this_step) {
|
||||||
|
is_caching_this_step = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!has_prev_Fn_residual || prev_Fn_residual_img.empty()) {
|
||||||
|
is_caching_this_step = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
float diff_img = calculate_residual_diff(prev_Fn_residual_img, Fn_residual_img);
|
||||||
|
float diff_txt = calculate_residual_diff(prev_Fn_residual_txt, Fn_residual_txt);
|
||||||
|
float diff = (diff_img + diff_txt) / 2.0f;
|
||||||
|
|
||||||
|
if (diff < config.dbcache.residual_diff_threshold) {
|
||||||
|
is_caching_this_step = true;
|
||||||
|
accumulated_residual_diff += diff;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_caching_this_step = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_prev_Fn_residual() {
|
||||||
|
prev_Fn_residual_img = Fn_residual_img;
|
||||||
|
prev_Fn_residual_txt = Fn_residual_txt;
|
||||||
|
has_prev_Fn_residual = !prev_Fn_residual_img.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_double_block_residual(int block_idx, const float* img, const float* txt, size_t img_size, size_t txt_size, const float* prev_img, const float* prev_txt) {
|
||||||
|
if (block_idx < 0 || block_idx >= static_cast<int>(double_block_cache.size()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
BlockCacheEntry& entry = double_block_cache[block_idx];
|
||||||
|
|
||||||
|
entry.residual_img.resize(img_size);
|
||||||
|
entry.residual_txt.resize(txt_size);
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
entry.residual_img[i] = img[i] - prev_img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
entry.residual_txt[i] = txt[i] - prev_txt[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.prev_img.resize(img_size);
|
||||||
|
entry.prev_txt.resize(txt_size);
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
entry.prev_img[i] = img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
entry.prev_txt[i] = txt[i];
|
||||||
|
}
|
||||||
|
entry.has_prev = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply_double_block_cache(int block_idx, float* img, float* txt, size_t img_size, size_t txt_size) {
|
||||||
|
if (block_idx < 0 || block_idx >= static_cast<int>(double_block_cache.size()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const BlockCacheEntry& entry = double_block_cache[block_idx];
|
||||||
|
if (entry.residual_img.size() != img_size || entry.residual_txt.size() != txt_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
img[i] += entry.residual_img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
txt[i] += entry.residual_txt[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
total_blocks_cached++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_single_block_residual(int block_idx, const float* output, size_t size, const float* input) {
|
||||||
|
if (block_idx < 0 || block_idx >= static_cast<int>(single_block_cache.size()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
BlockCacheEntry& entry = single_block_cache[block_idx];
|
||||||
|
|
||||||
|
entry.residual.resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
entry.residual[i] = output[i] - input[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.prev_output.resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
entry.prev_output[i] = output[i];
|
||||||
|
}
|
||||||
|
entry.has_prev = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply_single_block_cache(int block_idx, float* output, size_t size) {
|
||||||
|
if (block_idx < 0 || block_idx >= static_cast<int>(single_block_cache.size()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const BlockCacheEntry& entry = single_block_cache[block_idx];
|
||||||
|
if (entry.residual.size() != size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
output[i] += entry.residual[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
total_blocks_cached++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void store_Bn_buffer(const float* img, const float* txt, size_t img_size, size_t txt_size, const float* Bn_start_img, const float* Bn_start_txt) {
|
||||||
|
Bn_buffer_img.resize(img_size);
|
||||||
|
Bn_buffer_txt.resize(txt_size);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
Bn_buffer_img[i] = img[i] - Bn_start_img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
Bn_buffer_txt[i] = txt[i] - Bn_start_txt[i];
|
||||||
|
}
|
||||||
|
has_Bn_buffer = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply_Bn_buffer(float* img, float* txt, size_t img_size, size_t txt_size) {
|
||||||
|
if (!has_Bn_buffer)
|
||||||
|
return;
|
||||||
|
if (Bn_buffer_img.size() != img_size || Bn_buffer_txt.size() != txt_size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < img_size; i++) {
|
||||||
|
img[i] += Bn_buffer_img[i];
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < txt_size; i++) {
|
||||||
|
txt[i] += Bn_buffer_txt[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void taylor_update(const float* hidden_state, size_t size) {
|
||||||
|
if (!config.taylorseer.enabled)
|
||||||
|
return;
|
||||||
|
taylor_state.update_derivatives(hidden_state, size, current_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool taylor_can_approximate() const {
|
||||||
|
return config.taylorseer.enabled && taylor_state.can_approximate();
|
||||||
|
}
|
||||||
|
|
||||||
|
void taylor_approximate(float* output, size_t size) {
|
||||||
|
if (!config.taylorseer.enabled)
|
||||||
|
return;
|
||||||
|
taylor_state.approximate(output, size, current_step);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool should_use_taylor_this_step() const {
|
||||||
|
if (!config.taylorseer.enabled)
|
||||||
|
return false;
|
||||||
|
if (current_step < config.taylorseer.max_warmup_steps)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int interval = config.taylorseer.skip_interval_steps;
|
||||||
|
if (interval <= 0)
|
||||||
|
interval = 1;
|
||||||
|
|
||||||
|
return (current_step % (interval + 1)) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void log_metrics() const {
|
||||||
|
if (!enabled())
|
||||||
|
return;
|
||||||
|
|
||||||
|
int total_blocks = total_blocks_computed + total_blocks_cached;
|
||||||
|
float cache_ratio = (total_blocks > 0) ? (static_cast<float>(total_blocks_cached) / total_blocks * 100.0f) : 0.0f;
|
||||||
|
|
||||||
|
float step_cache_ratio = (total_steps > 0) ? (static_cast<float>(cached_steps.size()) / total_steps * 100.0f) : 0.0f;
|
||||||
|
|
||||||
|
LOG_INFO("CacheDIT: steps_cached=%zu/%d (%.1f%%), blocks_cached=%d/%d (%.1f%%), accum_diff=%.4f",
|
||||||
|
cached_steps.size(), total_steps, step_cache_ratio,
|
||||||
|
total_blocks_cached, total_blocks, cache_ratio,
|
||||||
|
accumulated_residual_diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_summary() const {
|
||||||
|
char buf[256];
|
||||||
|
snprintf(buf, sizeof(buf),
|
||||||
|
"CacheDIT[thresh=%.2f]: cached %zu/%d steps, %d/%d blocks",
|
||||||
|
config.dbcache.residual_diff_threshold,
|
||||||
|
cached_steps.size(), total_steps,
|
||||||
|
total_blocks_cached, total_blocks_computed + total_blocks_cached);
|
||||||
|
return std::string(buf);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::vector<int> parse_scm_mask(const std::string& mask_str) {
|
||||||
|
std::vector<int> mask;
|
||||||
|
if (mask_str.empty())
|
||||||
|
return mask;
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t start = 0;
|
||||||
|
while ((pos = mask_str.find(',', start)) != std::string::npos) {
|
||||||
|
std::string token = mask_str.substr(start, pos - start);
|
||||||
|
mask.push_back(std::stoi(token));
|
||||||
|
start = pos + 1;
|
||||||
|
}
|
||||||
|
if (start < mask_str.length()) {
|
||||||
|
mask.push_back(std::stoi(mask_str.substr(start)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::vector<int> generate_scm_mask(
|
||||||
|
const std::vector<int>& compute_bins,
|
||||||
|
const std::vector<int>& cache_bins,
|
||||||
|
int total_steps) {
|
||||||
|
std::vector<int> mask;
|
||||||
|
size_t c_idx = 0, cache_idx = 0;
|
||||||
|
|
||||||
|
while (static_cast<int>(mask.size()) < total_steps) {
|
||||||
|
if (c_idx < compute_bins.size()) {
|
||||||
|
for (int i = 0; i < compute_bins[c_idx] && static_cast<int>(mask.size()) < total_steps; i++) {
|
||||||
|
mask.push_back(1);
|
||||||
|
}
|
||||||
|
c_idx++;
|
||||||
|
}
|
||||||
|
if (cache_idx < cache_bins.size()) {
|
||||||
|
for (int i = 0; i < cache_bins[cache_idx] && static_cast<int>(mask.size()) < total_steps; i++) {
|
||||||
|
mask.push_back(0);
|
||||||
|
}
|
||||||
|
cache_idx++;
|
||||||
|
}
|
||||||
|
if (c_idx >= compute_bins.size() && cache_idx >= cache_bins.size())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mask.empty()) {
|
||||||
|
mask.back() = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void parse_dbcache_options(const std::string& opts, DBCacheConfig& cfg) {
|
||||||
|
if (opts.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
int Fn = 8, Bn = 0, warmup = 8, max_cached = -1, max_cont = -1;
|
||||||
|
float thresh = 0.08f;
|
||||||
|
|
||||||
|
sscanf(opts.c_str(), "%d,%d,%f,%d,%d,%d",
|
||||||
|
&Fn, &Bn, &thresh, &warmup, &max_cached, &max_cont);
|
||||||
|
|
||||||
|
cfg.Fn_compute_blocks = Fn;
|
||||||
|
cfg.Bn_compute_blocks = Bn;
|
||||||
|
cfg.residual_diff_threshold = thresh;
|
||||||
|
cfg.max_warmup_steps = warmup;
|
||||||
|
cfg.max_cached_steps = max_cached;
|
||||||
|
cfg.max_continuous_cached_steps = max_cont;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void parse_taylorseer_options(const std::string& opts, TaylorSeerConfig& cfg) {
|
||||||
|
if (opts.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
int n_deriv = 1, warmup = 2, interval = 1;
|
||||||
|
sscanf(opts.c_str(), "%d,%d,%d", &n_deriv, &warmup, &interval);
|
||||||
|
|
||||||
|
cfg.n_derivatives = n_deriv;
|
||||||
|
cfg.max_warmup_steps = warmup;
|
||||||
|
cfg.skip_interval_steps = interval;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CacheDitConditionState {
|
||||||
|
DBCacheConfig config;
|
||||||
|
TaylorSeerConfig taylor_config;
|
||||||
|
bool initialized = false;
|
||||||
|
|
||||||
|
int current_step_index = -1;
|
||||||
|
bool step_active = false;
|
||||||
|
bool skip_current_step = false;
|
||||||
|
bool initial_step = true;
|
||||||
|
int warmup_remaining = 0;
|
||||||
|
std::vector<int> cached_steps;
|
||||||
|
int continuous_cached_steps = 0;
|
||||||
|
float accumulated_residual_diff = 0.0f;
|
||||||
|
int total_steps_skipped = 0;
|
||||||
|
|
||||||
|
const void* anchor_condition = nullptr;
|
||||||
|
|
||||||
|
struct CacheEntry {
|
||||||
|
std::vector<float> diff;
|
||||||
|
std::vector<float> prev_input;
|
||||||
|
std::vector<float> prev_output;
|
||||||
|
bool has_prev = false;
|
||||||
|
};
|
||||||
|
std::unordered_map<const void*, CacheEntry> cache_diffs;
|
||||||
|
|
||||||
|
TaylorSeerState taylor_state;
|
||||||
|
|
||||||
|
float start_sigma = std::numeric_limits<float>::max();
|
||||||
|
float end_sigma = 0.0f;
|
||||||
|
|
||||||
|
void reset_runtime() {
|
||||||
|
current_step_index = -1;
|
||||||
|
step_active = false;
|
||||||
|
skip_current_step = false;
|
||||||
|
initial_step = true;
|
||||||
|
warmup_remaining = config.max_warmup_steps;
|
||||||
|
cached_steps.clear();
|
||||||
|
continuous_cached_steps = 0;
|
||||||
|
accumulated_residual_diff = 0.0f;
|
||||||
|
total_steps_skipped = 0;
|
||||||
|
anchor_condition = nullptr;
|
||||||
|
cache_diffs.clear();
|
||||||
|
taylor_state.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(const DBCacheConfig& dbcfg, const TaylorSeerConfig& tcfg) {
|
||||||
|
config = dbcfg;
|
||||||
|
taylor_config = tcfg;
|
||||||
|
initialized = dbcfg.enabled || tcfg.enabled;
|
||||||
|
reset_runtime();
|
||||||
|
|
||||||
|
if (taylor_config.enabled) {
|
||||||
|
taylor_state.init(taylor_config.n_derivatives, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_sigmas(const std::vector<float>& sigmas) {
|
||||||
|
if (!initialized || sigmas.size() < 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
float start_percent = 0.15f;
|
||||||
|
float end_percent = 0.95f;
|
||||||
|
|
||||||
|
size_t n_steps = sigmas.size() - 1;
|
||||||
|
size_t start_step = static_cast<size_t>(start_percent * n_steps);
|
||||||
|
size_t end_step = static_cast<size_t>(end_percent * n_steps);
|
||||||
|
|
||||||
|
if (start_step >= n_steps)
|
||||||
|
start_step = n_steps - 1;
|
||||||
|
if (end_step >= n_steps)
|
||||||
|
end_step = n_steps - 1;
|
||||||
|
|
||||||
|
start_sigma = sigmas[start_step];
|
||||||
|
end_sigma = sigmas[end_step];
|
||||||
|
|
||||||
|
if (start_sigma < end_sigma) {
|
||||||
|
std::swap(start_sigma, end_sigma);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool enabled() const {
|
||||||
|
return initialized && (config.enabled || taylor_config.enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin_step(int step_index, float sigma) {
|
||||||
|
if (!enabled())
|
||||||
|
return;
|
||||||
|
if (step_index == current_step_index)
|
||||||
|
return;
|
||||||
|
|
||||||
|
current_step_index = step_index;
|
||||||
|
skip_current_step = false;
|
||||||
|
step_active = false;
|
||||||
|
|
||||||
|
if (sigma > start_sigma)
|
||||||
|
return;
|
||||||
|
if (!(sigma > end_sigma))
|
||||||
|
return;
|
||||||
|
|
||||||
|
step_active = true;
|
||||||
|
|
||||||
|
if (warmup_remaining > 0) {
|
||||||
|
warmup_remaining--;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!config.steps_computation_mask.empty()) {
|
||||||
|
if (step_index < static_cast<int>(config.steps_computation_mask.size())) {
|
||||||
|
if (config.steps_computation_mask[step_index] == 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.max_cached_steps >= 0 &&
|
||||||
|
static_cast<int>(cached_steps.size()) >= config.max_cached_steps) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.max_continuous_cached_steps >= 0 &&
|
||||||
|
continuous_cached_steps >= config.max_continuous_cached_steps) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool step_is_active() const {
|
||||||
|
return enabled() && step_active;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_step_skipped() const {
|
||||||
|
return enabled() && step_active && skip_current_step;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_cache(const void* cond) const {
|
||||||
|
auto it = cache_diffs.find(cond);
|
||||||
|
return it != cache_diffs.end() && !it->second.diff.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_cache(const void* cond, const float* input, const float* output, size_t size) {
|
||||||
|
CacheEntry& entry = cache_diffs[cond];
|
||||||
|
entry.diff.resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
entry.diff[i] = output[i] - input[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.prev_input.resize(size);
|
||||||
|
entry.prev_output.resize(size);
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
entry.prev_input[i] = input[i];
|
||||||
|
entry.prev_output[i] = output[i];
|
||||||
|
}
|
||||||
|
entry.has_prev = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply_cache(const void* cond, const float* input, float* output, size_t size) {
|
||||||
|
auto it = cache_diffs.find(cond);
|
||||||
|
if (it == cache_diffs.end() || it->second.diff.empty())
|
||||||
|
return;
|
||||||
|
if (it->second.diff.size() != size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; i++) {
|
||||||
|
output[i] = input[i] + it->second.diff[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool before_condition(const void* cond, ggml_tensor* input, ggml_tensor* output, float sigma, int step_index) {
|
||||||
|
if (!enabled() || step_index < 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (step_index != current_step_index) {
|
||||||
|
begin_step(step_index, sigma);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!step_active)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (initial_step) {
|
||||||
|
anchor_condition = cond;
|
||||||
|
initial_step = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_anchor = (cond == anchor_condition);
|
||||||
|
|
||||||
|
if (skip_current_step) {
|
||||||
|
if (has_cache(cond)) {
|
||||||
|
apply_cache(cond, (float*)input->data, (float*)output->data,
|
||||||
|
static_cast<size_t>(ggml_nelements(output)));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_anchor)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto it = cache_diffs.find(cond);
|
||||||
|
if (it == cache_diffs.end() || !it->second.has_prev)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
size_t ne = static_cast<size_t>(ggml_nelements(input));
|
||||||
|
if (it->second.prev_input.size() != ne)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
float* input_data = (float*)input->data;
|
||||||
|
float diff = CacheDitState::calculate_residual_diff(
|
||||||
|
it->second.prev_input.data(), input_data, ne);
|
||||||
|
|
||||||
|
float effective_threshold = config.residual_diff_threshold;
|
||||||
|
if (config.Fn_compute_blocks > 0) {
|
||||||
|
float fn_confidence = 1.0f + 0.02f * (config.Fn_compute_blocks - 8);
|
||||||
|
fn_confidence = std::max(0.5f, std::min(2.0f, fn_confidence));
|
||||||
|
effective_threshold *= fn_confidence;
|
||||||
|
}
|
||||||
|
if (config.Bn_compute_blocks > 0) {
|
||||||
|
float bn_quality = 1.0f - 0.03f * config.Bn_compute_blocks;
|
||||||
|
bn_quality = std::max(0.5f, std::min(1.0f, bn_quality));
|
||||||
|
effective_threshold *= bn_quality;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diff < effective_threshold) {
|
||||||
|
skip_current_step = true;
|
||||||
|
total_steps_skipped++;
|
||||||
|
cached_steps.push_back(current_step_index);
|
||||||
|
continuous_cached_steps++;
|
||||||
|
accumulated_residual_diff += diff;
|
||||||
|
apply_cache(cond, input_data, (float*)output->data, ne);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
continuous_cached_steps = 0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void after_condition(const void* cond, ggml_tensor* input, ggml_tensor* output) {
|
||||||
|
if (!step_is_active())
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t ne = static_cast<size_t>(ggml_nelements(output));
|
||||||
|
update_cache(cond, (float*)input->data, (float*)output->data, ne);
|
||||||
|
|
||||||
|
if (cond == anchor_condition && taylor_config.enabled) {
|
||||||
|
taylor_state.update_derivatives((float*)output->data, ne, current_step_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void log_metrics() const {
|
||||||
|
if (!enabled())
|
||||||
|
return;
|
||||||
|
|
||||||
|
LOG_INFO("CacheDIT: steps_skipped=%d/%d (%.1f%%), accum_residual_diff=%.4f",
|
||||||
|
total_steps_skipped,
|
||||||
|
current_step_index + 1,
|
||||||
|
(current_step_index > 0) ? (100.0f * total_steps_skipped / (current_step_index + 1)) : 0.0f,
|
||||||
|
accumulated_residual_diff);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -3,34 +3,11 @@
|
|||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
|
#include "tokenize_util.h"
|
||||||
|
#include "vocab/vocab.h"
|
||||||
|
|
||||||
/*================================================== CLIPTokenizer ===================================================*/
|
/*================================================== CLIPTokenizer ===================================================*/
|
||||||
|
|
||||||
__STATIC_INLINE__ std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remove_lora(std::string text) {
|
|
||||||
std::regex re("<lora:([^:]+):([^>]+)>");
|
|
||||||
std::smatch matches;
|
|
||||||
std::unordered_map<std::string, float> filename2multiplier;
|
|
||||||
|
|
||||||
while (std::regex_search(text, matches, re)) {
|
|
||||||
std::string filename = matches[1].str();
|
|
||||||
float multiplier = std::stof(matches[2].str());
|
|
||||||
|
|
||||||
text = std::regex_replace(text, re, "", std::regex_constants::format_first_only);
|
|
||||||
|
|
||||||
if (multiplier == 0.f) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (filename2multiplier.find(filename) == filename2multiplier.end()) {
|
|
||||||
filename2multiplier[filename] = multiplier;
|
|
||||||
} else {
|
|
||||||
filename2multiplier[filename] += multiplier;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_pair(filename2multiplier, text);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_INLINE__ std::vector<std::pair<int, std::u32string>> bytes_to_unicode() {
|
__STATIC_INLINE__ std::vector<std::pair<int, std::u32string>> bytes_to_unicode() {
|
||||||
std::vector<std::pair<int, std::u32string>> byte_unicode_pairs;
|
std::vector<std::pair<int, std::u32string>> byte_unicode_pairs;
|
||||||
std::set<int> byte_set;
|
std::set<int> byte_set;
|
||||||
@ -72,6 +49,8 @@ private:
|
|||||||
int encoder_len;
|
int encoder_len;
|
||||||
int bpe_len;
|
int bpe_len;
|
||||||
|
|
||||||
|
std::vector<std::string> special_tokens;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const std::string UNK_TOKEN = "<|endoftext|>";
|
const std::string UNK_TOKEN = "<|endoftext|>";
|
||||||
const std::string BOS_TOKEN = "<|startoftext|>";
|
const std::string BOS_TOKEN = "<|startoftext|>";
|
||||||
@ -117,14 +96,25 @@ private:
|
|||||||
return pairs;
|
return pairs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_special_token(const std::string& token) {
|
||||||
|
for (auto& special_token : special_tokens) {
|
||||||
|
if (special_token == token) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CLIPTokenizer(int pad_token_id = 49407, const std::string& merges_utf8_str = "")
|
CLIPTokenizer(int pad_token_id = 49407, const std::string& merges_utf8_str = "")
|
||||||
: PAD_TOKEN_ID(pad_token_id) {
|
: PAD_TOKEN_ID(pad_token_id) {
|
||||||
if (merges_utf8_str.size() > 0) {
|
if (merges_utf8_str.size() > 0) {
|
||||||
load_from_merges(merges_utf8_str);
|
load_from_merges(merges_utf8_str);
|
||||||
} else {
|
} else {
|
||||||
load_from_merges(ModelLoader::load_merges());
|
load_from_merges(load_clip_merges());
|
||||||
}
|
}
|
||||||
|
add_special_token("<|startoftext|>");
|
||||||
|
add_special_token("<|endoftext|>");
|
||||||
}
|
}
|
||||||
|
|
||||||
void load_from_merges(const std::string& merges_utf8_str) {
|
void load_from_merges(const std::string& merges_utf8_str) {
|
||||||
@ -201,6 +191,10 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void add_special_token(const std::string& token) {
|
||||||
|
special_tokens.push_back(token);
|
||||||
|
}
|
||||||
|
|
||||||
std::u32string bpe(const std::u32string& token) {
|
std::u32string bpe(const std::u32string& token) {
|
||||||
std::vector<std::u32string> word;
|
std::vector<std::u32string> word;
|
||||||
|
|
||||||
@ -303,7 +297,7 @@ public:
|
|||||||
size_t max_length = 0,
|
size_t max_length = 0,
|
||||||
bool padding = false) {
|
bool padding = false) {
|
||||||
if (max_length > 0 && padding) {
|
if (max_length > 0 && padding) {
|
||||||
size_t n = std::ceil(tokens.size() * 1.0 / (max_length - 2));
|
size_t n = static_cast<size_t>(std::ceil(tokens.size() * 1.0 / (max_length - 2)));
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
n = 1;
|
n = 1;
|
||||||
}
|
}
|
||||||
@ -379,25 +373,54 @@ public:
|
|||||||
return trim(text);
|
return trim(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> token_split(const std::string& text) {
|
||||||
|
std::regex pat(R"('s|'t|'re|'ve|'m|'ll|'d|[[:alpha:]]+|[[:digit:]]|[^[:space:][:alpha:][:digit:]]+)",
|
||||||
|
std::regex::icase);
|
||||||
|
std::sregex_iterator iter(text.begin(), text.end(), pat);
|
||||||
|
std::sregex_iterator end;
|
||||||
|
|
||||||
|
std::vector<std::string> result;
|
||||||
|
for (; iter != end; ++iter) {
|
||||||
|
result.emplace_back(iter->str());
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<int> encode(std::string text, on_new_token_cb_t on_new_token_cb) {
|
std::vector<int> encode(std::string text, on_new_token_cb_t on_new_token_cb) {
|
||||||
std::string original_text = text;
|
std::string original_text = text;
|
||||||
std::vector<int32_t> bpe_tokens;
|
std::vector<int32_t> bpe_tokens;
|
||||||
text = whitespace_clean(text);
|
text = whitespace_clean(text);
|
||||||
std::transform(text.begin(), text.end(), text.begin(), [](unsigned char c) { return std::tolower(c); });
|
std::transform(text.begin(), text.end(), text.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||||
|
|
||||||
std::regex pat(R"(<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[[:alpha:]]+|[[:digit:]]|[^[:space:][:alpha:][:digit:]]+)",
|
|
||||||
std::regex::icase);
|
|
||||||
|
|
||||||
std::smatch matches;
|
|
||||||
std::string str = text;
|
std::string str = text;
|
||||||
std::vector<std::string> token_strs;
|
std::vector<std::string> token_strs;
|
||||||
while (std::regex_search(str, matches, pat)) {
|
|
||||||
bool skip = on_new_token_cb(str, bpe_tokens);
|
auto splited_texts = split_with_special_tokens(text, special_tokens);
|
||||||
|
|
||||||
|
for (auto& splited_text : splited_texts) {
|
||||||
|
LOG_DEBUG("token %s", splited_text.c_str());
|
||||||
|
if (is_special_token(splited_text)) {
|
||||||
|
LOG_DEBUG("special %s", splited_text.c_str());
|
||||||
|
bool skip = on_new_token_cb(splited_text, bpe_tokens);
|
||||||
if (skip) {
|
if (skip) {
|
||||||
|
token_strs.push_back(splited_text);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (auto& token : matches) {
|
continue;
|
||||||
std::string token_str = token.str();
|
}
|
||||||
|
|
||||||
|
auto tokens = token_split(splited_text);
|
||||||
|
for (auto& token : tokens) {
|
||||||
|
if (on_new_token_cb != nullptr) {
|
||||||
|
bool skip = on_new_token_cb(token, bpe_tokens);
|
||||||
|
if (skip) {
|
||||||
|
token_strs.push_back(token);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string token_str = token;
|
||||||
std::u32string utf32_token;
|
std::u32string utf32_token;
|
||||||
for (int i = 0; i < token_str.length(); i++) {
|
for (int i = 0; i < token_str.length(); i++) {
|
||||||
unsigned char b = token_str[i];
|
unsigned char b = token_str[i];
|
||||||
@ -417,14 +440,13 @@ public:
|
|||||||
bpe_tokens.push_back(encoder[bpe_str]);
|
bpe_tokens.push_back(encoder[bpe_str]);
|
||||||
token_strs.push_back(utf32_to_utf8(bpe_str));
|
token_strs.push_back(utf32_to_utf8(bpe_str));
|
||||||
}
|
}
|
||||||
str = matches.suffix();
|
|
||||||
}
|
}
|
||||||
std::stringstream ss;
|
// std::stringstream ss;
|
||||||
ss << "[";
|
// ss << "[";
|
||||||
for (auto token : token_strs) {
|
// for (auto token : token_strs) {
|
||||||
ss << "\"" << token << "\", ";
|
// ss << "\"" << token << "\", ";
|
||||||
}
|
// }
|
||||||
ss << "]";
|
// ss << "]";
|
||||||
// LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str());
|
// LOG_DEBUG("split prompt \"%s\" to tokens %s", original_text.c_str(), ss.str().c_str());
|
||||||
// printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
|
// printf("split prompt \"%s\" to tokens %s \n", original_text.c_str(), ss.str().c_str());
|
||||||
return bpe_tokens;
|
return bpe_tokens;
|
||||||
@ -451,16 +473,16 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, n_token, d_model]
|
// x: [N, n_token, d_model]
|
||||||
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
||||||
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
||||||
|
|
||||||
x = fc1->forward(ctx, x);
|
x = fc1->forward(ctx, x);
|
||||||
if (use_gelu) {
|
if (use_gelu) {
|
||||||
x = ggml_gelu_inplace(ctx->ggml_ctx, x);
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
} else {
|
} else {
|
||||||
x = ggml_gelu_quick_inplace(ctx->ggml_ctx, x);
|
x = ggml_ext_gelu_quick(ctx->ggml_ctx, x, true);
|
||||||
}
|
}
|
||||||
x = fc2->forward(ctx, x);
|
x = fc2->forward(ctx, x);
|
||||||
return x;
|
return x;
|
||||||
@ -489,7 +511,7 @@ public:
|
|||||||
blocks["mlp"] = std::shared_ptr<GGMLBlock>(new CLIPMLP(d_model, intermediate_size));
|
blocks["mlp"] = std::shared_ptr<GGMLBlock>(new CLIPMLP(d_model, intermediate_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x, bool mask = true) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x, ggml_tensor* mask = nullptr) {
|
||||||
// x: [N, n_token, d_model]
|
// x: [N, n_token, d_model]
|
||||||
auto self_attn = std::dynamic_pointer_cast<MultiheadAttention>(blocks["self_attn"]);
|
auto self_attn = std::dynamic_pointer_cast<MultiheadAttention>(blocks["self_attn"]);
|
||||||
auto layer_norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm1"]);
|
auto layer_norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm1"]);
|
||||||
@ -504,10 +526,10 @@ public:
|
|||||||
|
|
||||||
struct CLIPEncoder : public GGMLBlock {
|
struct CLIPEncoder : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
int64_t n_layer;
|
int n_layer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CLIPEncoder(int64_t n_layer,
|
CLIPEncoder(int n_layer,
|
||||||
int64_t d_model,
|
int64_t d_model,
|
||||||
int64_t n_head,
|
int64_t n_head,
|
||||||
int64_t intermediate_size,
|
int64_t intermediate_size,
|
||||||
@ -519,10 +541,10 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
int clip_skip = -1,
|
ggml_tensor* mask = nullptr,
|
||||||
bool mask = true) {
|
int clip_skip = -1) {
|
||||||
// x: [N, n_token, d_model]
|
// x: [N, n_token, d_model]
|
||||||
int layer_idx = n_layer - 1;
|
int layer_idx = n_layer - 1;
|
||||||
// LOG_DEBUG("clip_skip %d", clip_skip);
|
// LOG_DEBUG("clip_skip %d", clip_skip);
|
||||||
@ -551,7 +573,7 @@ protected:
|
|||||||
int64_t num_positions;
|
int64_t num_positions;
|
||||||
bool force_clip_f32;
|
bool force_clip_f32;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
||||||
enum ggml_type token_wtype = GGML_TYPE_F32;
|
enum ggml_type token_wtype = GGML_TYPE_F32;
|
||||||
if (!force_clip_f32) {
|
if (!force_clip_f32) {
|
||||||
token_wtype = get_type(prefix + "token_embedding.weight", tensor_storage_map, GGML_TYPE_F32);
|
token_wtype = get_type(prefix + "token_embedding.weight", tensor_storage_map, GGML_TYPE_F32);
|
||||||
@ -575,13 +597,13 @@ public:
|
|||||||
force_clip_f32(force_clip_f32) {
|
force_clip_f32(force_clip_f32) {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* get_token_embed_weight() {
|
ggml_tensor* get_token_embed_weight() {
|
||||||
return params["token_embedding.weight"];
|
return params["token_embedding.weight"];
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* input_ids,
|
ggml_tensor* input_ids,
|
||||||
struct ggml_tensor* custom_embed_weight) {
|
ggml_tensor* custom_embed_weight) {
|
||||||
// input_ids: [N, n_token]
|
// input_ids: [N, n_token]
|
||||||
auto token_embed_weight = params["token_embedding.weight"];
|
auto token_embed_weight = params["token_embedding.weight"];
|
||||||
auto position_embed_weight = params["position_embedding.weight"];
|
auto position_embed_weight = params["position_embedding.weight"];
|
||||||
@ -602,13 +624,13 @@ public:
|
|||||||
class CLIPVisionEmbeddings : public GGMLBlock {
|
class CLIPVisionEmbeddings : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
int64_t embed_dim;
|
int64_t embed_dim;
|
||||||
int64_t num_channels;
|
int num_channels;
|
||||||
int64_t patch_size;
|
int patch_size;
|
||||||
int64_t image_size;
|
int image_size;
|
||||||
int64_t num_patches;
|
int num_patches;
|
||||||
int64_t num_positions;
|
int64_t num_positions;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
||||||
enum ggml_type patch_wtype = GGML_TYPE_F16;
|
enum ggml_type patch_wtype = GGML_TYPE_F16;
|
||||||
enum ggml_type class_wtype = GGML_TYPE_F32;
|
enum ggml_type class_wtype = GGML_TYPE_F32;
|
||||||
enum ggml_type position_wtype = GGML_TYPE_F32;
|
enum ggml_type position_wtype = GGML_TYPE_F32;
|
||||||
@ -620,9 +642,9 @@ protected:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
CLIPVisionEmbeddings(int64_t embed_dim,
|
CLIPVisionEmbeddings(int64_t embed_dim,
|
||||||
int64_t num_channels = 3,
|
int num_channels = 3,
|
||||||
int64_t patch_size = 14,
|
int patch_size = 14,
|
||||||
int64_t image_size = 224)
|
int image_size = 224)
|
||||||
: embed_dim(embed_dim),
|
: embed_dim(embed_dim),
|
||||||
num_channels(num_channels),
|
num_channels(num_channels),
|
||||||
patch_size(patch_size),
|
patch_size(patch_size),
|
||||||
@ -631,7 +653,7 @@ public:
|
|||||||
num_positions = num_patches + 1;
|
num_positions = num_patches + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* pixel_values) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* pixel_values) {
|
||||||
// pixel_values: [N, num_channels, image_size, image_size]
|
// pixel_values: [N, num_channels, image_size, image_size]
|
||||||
// return: [N, num_positions, embed_dim]
|
// return: [N, num_positions, embed_dim]
|
||||||
GGML_ASSERT(pixel_values->ne[0] == image_size && pixel_values->ne[1] == image_size && pixel_values->ne[2] == num_channels);
|
GGML_ASSERT(pixel_values->ne[0] == image_size && pixel_values->ne[1] == image_size && pixel_values->ne[2] == num_channels);
|
||||||
@ -641,18 +663,18 @@ public:
|
|||||||
auto position_embed_weight = params["position_embedding.weight"];
|
auto position_embed_weight = params["position_embedding.weight"];
|
||||||
|
|
||||||
// concat(patch_embedding, class_embedding) + position_embedding
|
// concat(patch_embedding, class_embedding) + position_embedding
|
||||||
struct ggml_tensor* patch_embedding;
|
ggml_tensor* patch_embedding;
|
||||||
int64_t N = pixel_values->ne[3];
|
int64_t N = pixel_values->ne[3];
|
||||||
patch_embedding = ggml_ext_conv_2d(ctx->ggml_ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
|
patch_embedding = ggml_ext_conv_2d(ctx->ggml_ctx, pixel_values, patch_embed_weight, nullptr, patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
|
||||||
patch_embedding = ggml_reshape_3d(ctx->ggml_ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
|
patch_embedding = ggml_reshape_3d(ctx->ggml_ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
|
||||||
patch_embedding = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
|
patch_embedding = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, patch_embedding, 1, 0, 2, 3)); // [N, num_patches, embed_dim]
|
||||||
patch_embedding = ggml_reshape_4d(ctx->ggml_ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
|
patch_embedding = ggml_reshape_4d(ctx->ggml_ctx, patch_embedding, 1, embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
|
||||||
|
|
||||||
struct ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx->ggml_ctx, GGML_TYPE_F32, embed_dim, N);
|
ggml_tensor* class_embedding = ggml_new_tensor_2d(ctx->ggml_ctx, GGML_TYPE_F32, embed_dim, N);
|
||||||
class_embedding = ggml_repeat(ctx->ggml_ctx, class_embed_weight, class_embedding); // [N, embed_dim]
|
class_embedding = ggml_repeat(ctx->ggml_ctx, class_embed_weight, class_embedding); // [N, embed_dim]
|
||||||
class_embedding = ggml_reshape_4d(ctx->ggml_ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1]
|
class_embedding = ggml_reshape_4d(ctx->ggml_ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1]
|
||||||
|
|
||||||
struct ggml_tensor* x = ggml_concat(ctx->ggml_ctx, class_embedding, patch_embedding, 2); // [N, num_positions, embed_dim, 1]
|
ggml_tensor* x = ggml_concat(ctx->ggml_ctx, class_embedding, patch_embedding, 2); // [N, num_positions, embed_dim, 1]
|
||||||
x = ggml_reshape_3d(ctx->ggml_ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim]
|
x = ggml_reshape_3d(ctx->ggml_ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim]
|
||||||
x = ggml_add(ctx->ggml_ctx, x, position_embed_weight);
|
x = ggml_add(ctx->ggml_ctx, x, position_embed_weight);
|
||||||
return x; // [N, num_positions, embed_dim]
|
return x; // [N, num_positions, embed_dim]
|
||||||
@ -671,7 +693,7 @@ enum CLIPVersion {
|
|||||||
|
|
||||||
class CLIPTextModel : public GGMLBlock {
|
class CLIPTextModel : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
||||||
if (version == OPEN_CLIP_VIT_BIGG_14) {
|
if (version == OPEN_CLIP_VIT_BIGG_14) {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
|
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
|
||||||
@ -712,14 +734,15 @@ public:
|
|||||||
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
blocks["final_layer_norm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* get_token_embed_weight() {
|
ggml_tensor* get_token_embed_weight() {
|
||||||
auto embeddings = std::dynamic_pointer_cast<CLIPEmbeddings>(blocks["embeddings"]);
|
auto embeddings = std::dynamic_pointer_cast<CLIPEmbeddings>(blocks["embeddings"]);
|
||||||
return embeddings->get_token_embed_weight();
|
return embeddings->get_token_embed_weight();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* input_ids,
|
ggml_tensor* input_ids,
|
||||||
struct ggml_tensor* tkn_embeddings,
|
ggml_tensor* tkn_embeddings,
|
||||||
|
ggml_tensor* mask = nullptr,
|
||||||
size_t max_token_idx = 0,
|
size_t max_token_idx = 0,
|
||||||
bool return_pooled = false,
|
bool return_pooled = false,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
@ -729,7 +752,7 @@ public:
|
|||||||
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
|
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
|
||||||
|
|
||||||
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
|
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
|
||||||
x = encoder->forward(ctx, x, return_pooled ? -1 : clip_skip, true);
|
x = encoder->forward(ctx, x, mask, return_pooled ? -1 : clip_skip);
|
||||||
if (return_pooled || with_final_ln) {
|
if (return_pooled || with_final_ln) {
|
||||||
x = final_layer_norm->forward(ctx, x);
|
x = final_layer_norm->forward(ctx, x);
|
||||||
}
|
}
|
||||||
@ -781,8 +804,8 @@ public:
|
|||||||
blocks["post_layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
blocks["post_layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* pixel_values,
|
ggml_tensor* pixel_values,
|
||||||
bool return_pooled = true,
|
bool return_pooled = true,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
// pixel_values: [N, num_channels, image_size, image_size]
|
// pixel_values: [N, num_channels, image_size, image_size]
|
||||||
@ -793,9 +816,10 @@ public:
|
|||||||
|
|
||||||
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
|
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
|
||||||
x = pre_layernorm->forward(ctx, x);
|
x = pre_layernorm->forward(ctx, x);
|
||||||
x = encoder->forward(ctx, x, clip_skip, false);
|
x = encoder->forward(ctx, x, nullptr, clip_skip);
|
||||||
// print_ggml_tensor(x, true, "ClipVisionModel x: ");
|
|
||||||
auto last_hidden_state = x;
|
auto last_hidden_state = x;
|
||||||
|
|
||||||
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
|
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
|
||||||
|
|
||||||
GGML_ASSERT(x->ne[3] == 1);
|
GGML_ASSERT(x->ne[3] == 1);
|
||||||
@ -815,7 +839,7 @@ protected:
|
|||||||
int64_t out_features;
|
int64_t out_features;
|
||||||
bool transpose_weight;
|
bool transpose_weight;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") override {
|
||||||
enum ggml_type wtype = get_type(prefix + "weight", tensor_storage_map, GGML_TYPE_F32);
|
enum ggml_type wtype = get_type(prefix + "weight", tensor_storage_map, GGML_TYPE_F32);
|
||||||
if (transpose_weight) {
|
if (transpose_weight) {
|
||||||
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
|
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
|
||||||
@ -832,8 +856,8 @@ public:
|
|||||||
out_features(out_features),
|
out_features(out_features),
|
||||||
transpose_weight(transpose_weight) {}
|
transpose_weight(transpose_weight) {}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
struct ggml_tensor* w = params["weight"];
|
ggml_tensor* w = params["weight"];
|
||||||
if (transpose_weight) {
|
if (transpose_weight) {
|
||||||
w = ggml_cont(ctx->ggml_ctx, ggml_transpose(ctx->ggml_ctx, w));
|
w = ggml_cont(ctx->ggml_ctx, ggml_transpose(ctx->ggml_ctx, w));
|
||||||
}
|
}
|
||||||
@ -862,8 +886,8 @@ public:
|
|||||||
blocks["visual_projection"] = std::shared_ptr<GGMLBlock>(new CLIPProjection(hidden_size, projection_dim, transpose_proj_w));
|
blocks["visual_projection"] = std::shared_ptr<GGMLBlock>(new CLIPProjection(hidden_size, projection_dim, transpose_proj_w));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* pixel_values,
|
ggml_tensor* pixel_values,
|
||||||
bool return_pooled = true,
|
bool return_pooled = true,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
// pixel_values: [N, num_channels, image_size, image_size]
|
// pixel_values: [N, num_channels, image_size, image_size]
|
||||||
@ -884,6 +908,8 @@ public:
|
|||||||
struct CLIPTextModelRunner : public GGMLRunner {
|
struct CLIPTextModelRunner : public GGMLRunner {
|
||||||
CLIPTextModel model;
|
CLIPTextModel model;
|
||||||
|
|
||||||
|
std::vector<float> attention_mask_vec;
|
||||||
|
|
||||||
CLIPTextModelRunner(ggml_backend_t backend,
|
CLIPTextModelRunner(ggml_backend_t backend,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
const String2TensorStorage& tensor_storage_map,
|
const String2TensorStorage& tensor_storage_map,
|
||||||
@ -910,13 +936,14 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
return "clip";
|
return "clip";
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
model.get_param_tensors(tensors, prefix);
|
model.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* input_ids,
|
ggml_tensor* input_ids,
|
||||||
struct ggml_tensor* embeddings,
|
ggml_tensor* embeddings,
|
||||||
|
ggml_tensor* mask,
|
||||||
size_t max_token_idx = 0,
|
size_t max_token_idx = 0,
|
||||||
bool return_pooled = false,
|
bool return_pooled = false,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
@ -927,20 +954,20 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
input_ids = ggml_reshape_2d(ctx->ggml_ctx, input_ids, model.n_token, input_ids->ne[0] / model.n_token);
|
input_ids = ggml_reshape_2d(ctx->ggml_ctx, input_ids, model.n_token, input_ids->ne[0] / model.n_token);
|
||||||
}
|
}
|
||||||
|
|
||||||
return model.forward(ctx, input_ids, embeddings, max_token_idx, return_pooled, clip_skip);
|
return model.forward(ctx, input_ids, embeddings, mask, max_token_idx, return_pooled, clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* input_ids,
|
ggml_cgraph* build_graph(ggml_tensor* input_ids,
|
||||||
int num_custom_embeddings = 0,
|
int num_custom_embeddings = 0,
|
||||||
void* custom_embeddings_data = nullptr,
|
void* custom_embeddings_data = nullptr,
|
||||||
size_t max_token_idx = 0,
|
size_t max_token_idx = 0,
|
||||||
bool return_pooled = false,
|
bool return_pooled = false,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
struct ggml_cgraph* gf = new_graph_custom(2048);
|
ggml_cgraph* gf = new_graph_custom(2048);
|
||||||
|
|
||||||
input_ids = to_backend(input_ids);
|
input_ids = to_backend(input_ids);
|
||||||
|
|
||||||
struct ggml_tensor* embeddings = nullptr;
|
ggml_tensor* embeddings = nullptr;
|
||||||
|
|
||||||
if (num_custom_embeddings > 0 && custom_embeddings_data != nullptr) {
|
if (num_custom_embeddings > 0 && custom_embeddings_data != nullptr) {
|
||||||
auto token_embed_weight = model.get_token_embed_weight();
|
auto token_embed_weight = model.get_token_embed_weight();
|
||||||
@ -954,17 +981,31 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 1);
|
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int n_tokens = static_cast<int>(input_ids->ne[0]);
|
||||||
|
attention_mask_vec.resize(n_tokens * n_tokens);
|
||||||
|
for (int i0 = 0; i0 < n_tokens; i0++) {
|
||||||
|
for (int i1 = 0; i1 < n_tokens; i1++) {
|
||||||
|
float value = 0.f;
|
||||||
|
if (i0 > i1) {
|
||||||
|
value = -INFINITY;
|
||||||
|
}
|
||||||
|
attention_mask_vec[i1 * n_tokens + i0] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto attention_mask = ggml_new_tensor_2d(compute_ctx, GGML_TYPE_F32, n_tokens, n_tokens);
|
||||||
|
set_backend_tensor_data(attention_mask, attention_mask_vec.data());
|
||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
|
|
||||||
struct ggml_tensor* hidden_states = forward(&runner_ctx, input_ids, embeddings, max_token_idx, return_pooled, clip_skip);
|
ggml_tensor* hidden_states = forward(&runner_ctx, input_ids, embeddings, attention_mask, max_token_idx, return_pooled, clip_skip);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, hidden_states);
|
ggml_build_forward_expand(gf, hidden_states);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(const int n_threads,
|
bool compute(const int n_threads,
|
||||||
struct ggml_tensor* input_ids,
|
ggml_tensor* input_ids,
|
||||||
int num_custom_embeddings,
|
int num_custom_embeddings,
|
||||||
void* custom_embeddings_data,
|
void* custom_embeddings_data,
|
||||||
size_t max_token_idx,
|
size_t max_token_idx,
|
||||||
@ -972,10 +1013,10 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
int clip_skip,
|
int clip_skip,
|
||||||
ggml_tensor** output,
|
ggml_tensor** output,
|
||||||
ggml_context* output_ctx = nullptr) {
|
ggml_context* output_ctx = nullptr) {
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
|
return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
|
||||||
};
|
};
|
||||||
GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, true, output, output_ctx);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef __COMMON_HPP__
|
#ifndef __COMMON_BLOCK_HPP__
|
||||||
#define __COMMON_HPP__
|
#define __COMMON_BLOCK_HPP__
|
||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
@ -23,12 +23,12 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
if (vae_downsample) {
|
if (vae_downsample) {
|
||||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
||||||
|
|
||||||
x = ggml_pad(ctx->ggml_ctx, x, 1, 1, 0, 0);
|
x = ggml_ext_pad(ctx->ggml_ctx, x, 1, 1, 0, 0, ctx->circular_x_enabled, ctx->circular_y_enabled);
|
||||||
x = conv->forward(ctx, x);
|
x = conv->forward(ctx, x);
|
||||||
} else {
|
} else {
|
||||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["op"]);
|
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["op"]);
|
||||||
@ -52,7 +52,7 @@ public:
|
|||||||
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1}));
|
blocks["conv"] = std::shared_ptr<GGMLBlock>(new Conv2d(channels, out_channels, {3, 3}, {1, 1}, {1, 1}));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ protected:
|
|||||||
std::pair<int, int> padding) {
|
std::pair<int, int> padding) {
|
||||||
GGML_ASSERT(dims == 2 || dims == 3);
|
GGML_ASSERT(dims == 2 || dims == 3);
|
||||||
if (dims == 3) {
|
if (dims == 3) {
|
||||||
return std::shared_ptr<GGMLBlock>(new Conv3dnx1x1(in_channels, out_channels, kernel_size.first, 1, padding.first));
|
return std::shared_ptr<GGMLBlock>(new Conv3d(in_channels, out_channels, {kernel_size.first, 1, 1}, {1, 1, 1}, {padding.first, 0, 0}));
|
||||||
} else {
|
} else {
|
||||||
return std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, out_channels, kernel_size, {1, 1}, padding));
|
return std::shared_ptr<GGMLBlock>(new Conv2d(in_channels, out_channels, kernel_size, {1, 1}, padding));
|
||||||
}
|
}
|
||||||
@ -121,7 +121,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x, struct ggml_tensor* emb = nullptr) {
|
virtual ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x, ggml_tensor* emb = nullptr) {
|
||||||
// For dims==3, we reduce dimension from 5d to 4d by merging h and w, in order not to change ggml
|
// For dims==3, we reduce dimension from 5d to 4d by merging h and w, in order not to change ggml
|
||||||
// [N, c, t, h, w] => [N, c, t, h * w]
|
// [N, c, t, h, w] => [N, c, t, h * w]
|
||||||
// x: [N, channels, h, w] if dims == 2 else [N, channels, t, h, w]
|
// x: [N, channels, h, w] if dims == 2 else [N, channels, t, h, w]
|
||||||
@ -188,17 +188,19 @@ public:
|
|||||||
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out * 2));
|
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out * 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
// x: [ne3, ne2, ne1, dim_in]
|
// x: [ne3, ne2, ne1, dim_in]
|
||||||
// return: [ne3, ne2, ne1, dim_out]
|
// return: [ne3, ne2, ne1, dim_out]
|
||||||
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
||||||
|
|
||||||
x = proj->forward(ctx, x); // [ne3, ne2, ne1, dim_out*2]
|
x = proj->forward(ctx, x); // [ne3, ne2, ne1, dim_out*2]
|
||||||
auto x_vec = ggml_ext_chunk(ctx->ggml_ctx, x, 2, 0);
|
auto x_vec = ggml_ext_chunk(ctx->ggml_ctx, x, 2, 0, false);
|
||||||
x = x_vec[0]; // [ne3, ne2, ne1, dim_out]
|
x = x_vec[0]; // [ne3, ne2, ne1, dim_out]
|
||||||
auto gate = x_vec[1]; // [ne3, ne2, ne1, dim_out]
|
auto gate = x_vec[1]; // [ne3, ne2, ne1, dim_out]
|
||||||
|
|
||||||
gate = ggml_gelu_inplace(ctx->ggml_ctx, gate);
|
gate = ggml_cont(ctx->ggml_ctx, gate);
|
||||||
|
|
||||||
|
gate = ggml_ext_gelu(ctx->ggml_ctx, gate, true);
|
||||||
|
|
||||||
x = ggml_mul(ctx->ggml_ctx, x, gate); // [ne3, ne2, ne1, dim_out]
|
x = ggml_mul(ctx->ggml_ctx, x, gate); // [ne3, ne2, ne1, dim_out]
|
||||||
|
|
||||||
@ -212,13 +214,13 @@ public:
|
|||||||
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
|
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) override {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) override {
|
||||||
// x: [ne3, ne2, ne1, dim_in]
|
// x: [ne3, ne2, ne1, dim_in]
|
||||||
// return: [ne3, ne2, ne1, dim_out]
|
// return: [ne3, ne2, ne1, dim_out]
|
||||||
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
||||||
|
|
||||||
x = proj->forward(ctx, x);
|
x = proj->forward(ctx, x);
|
||||||
x = ggml_gelu_inplace(ctx->ggml_ctx, x);
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -256,7 +258,7 @@ public:
|
|||||||
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32, scale));
|
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, force_prec_f32, scale));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [ne3, ne2, ne1, dim]
|
// x: [ne3, ne2, ne1, dim]
|
||||||
// return: [ne3, ne2, ne1, dim_out]
|
// return: [ne3, ne2, ne1, dim_out]
|
||||||
|
|
||||||
@ -295,9 +297,9 @@ public:
|
|||||||
// to_out_1 is nn.Dropout(), skip for inference
|
// to_out_1 is nn.Dropout(), skip for inference
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* context) {
|
ggml_tensor* context) {
|
||||||
// x: [N, n_token, query_dim]
|
// x: [N, n_token, query_dim]
|
||||||
// context: [N, n_context, context_dim]
|
// context: [N, n_context, context_dim]
|
||||||
// return: [N, n_token, query_dim]
|
// return: [N, n_token, query_dim]
|
||||||
@ -315,7 +317,7 @@ public:
|
|||||||
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
|
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
|
||||||
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]
|
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]
|
||||||
|
|
||||||
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, n_head, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, inner_dim]
|
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, q, k, v, n_head, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, inner_dim]
|
||||||
|
|
||||||
x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
|
x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
|
||||||
return x;
|
return x;
|
||||||
@ -353,9 +355,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* context) {
|
ggml_tensor* context) {
|
||||||
// x: [N, n_token, query_dim]
|
// x: [N, n_token, query_dim]
|
||||||
// context: [N, n_context, context_dim]
|
// context: [N, n_context, context_dim]
|
||||||
// return: [N, n_token, query_dim]
|
// return: [N, n_token, query_dim]
|
||||||
@ -404,7 +406,7 @@ protected:
|
|||||||
int64_t context_dim = 768; // hidden_size, 1024 for VERSION_SD2
|
int64_t context_dim = 768; // hidden_size, 1024 for VERSION_SD2
|
||||||
bool use_linear = false;
|
bool use_linear = false;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, const std::string prefix = "") {
|
||||||
auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
|
auto iter = tensor_storage_map.find(prefix + "proj_out.weight");
|
||||||
if (iter != tensor_storage_map.end()) {
|
if (iter != tensor_storage_map.end()) {
|
||||||
int64_t inner_dim = n_head * d_head;
|
int64_t inner_dim = n_head * d_head;
|
||||||
@ -454,9 +456,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
virtual ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* context) {
|
ggml_tensor* context) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
// context: [N, max_position(aka n_token), hidden_size(aka context_dim)]
|
// context: [N, max_position(aka n_token), hidden_size(aka context_dim)]
|
||||||
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
auto norm = std::dynamic_pointer_cast<GroupNorm32>(blocks["norm"]);
|
||||||
@ -508,7 +510,7 @@ public:
|
|||||||
|
|
||||||
class AlphaBlender : public GGMLBlock {
|
class AlphaBlender : public GGMLBlock {
|
||||||
protected:
|
protected:
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, std::string prefix = "") override {
|
||||||
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
|
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
|
||||||
enum ggml_type wtype = GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
|
||||||
@ -528,23 +530,23 @@ public:
|
|||||||
// since mix_factor.shape is [1,], we don't need rearrange using rearrange_pattern
|
// since mix_factor.shape is [1,], we don't need rearrange using rearrange_pattern
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x_spatial,
|
ggml_tensor* x_spatial,
|
||||||
struct ggml_tensor* x_temporal) {
|
ggml_tensor* x_temporal) {
|
||||||
// image_only_indicator is always tensor([0.])
|
// image_only_indicator is always tensor([0.])
|
||||||
float alpha = get_alpha();
|
float alpha = get_alpha();
|
||||||
auto x = ggml_add(ctx->ggml_ctx,
|
auto x = ggml_add(ctx->ggml_ctx,
|
||||||
ggml_scale(ctx->ggml_ctx, x_spatial, alpha),
|
ggml_ext_scale(ctx->ggml_ctx, x_spatial, alpha),
|
||||||
ggml_scale(ctx->ggml_ctx, x_temporal, 1.0f - alpha));
|
ggml_ext_scale(ctx->ggml_ctx, x_temporal, 1.0f - alpha));
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class VideoResBlock : public ResBlock {
|
class VideoResBlock : public ResBlock {
|
||||||
public:
|
public:
|
||||||
VideoResBlock(int channels,
|
VideoResBlock(int64_t channels,
|
||||||
int emb_channels,
|
int64_t emb_channels,
|
||||||
int out_channels,
|
int64_t out_channels,
|
||||||
std::pair<int, int> kernel_size = {3, 3},
|
std::pair<int, int> kernel_size = {3, 3},
|
||||||
int64_t video_kernel_size = 3,
|
int64_t video_kernel_size = 3,
|
||||||
int dims = 2) // always 2
|
int dims = 2) // always 2
|
||||||
@ -553,9 +555,9 @@ public:
|
|||||||
blocks["time_mixer"] = std::shared_ptr<GGMLBlock>(new AlphaBlender());
|
blocks["time_mixer"] = std::shared_ptr<GGMLBlock>(new AlphaBlender());
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* emb,
|
ggml_tensor* emb,
|
||||||
int num_video_frames) {
|
int num_video_frames) {
|
||||||
// x: [N, channels, h, w] aka [b*t, channels, h, w]
|
// x: [N, channels, h, w] aka [b*t, channels, h, w]
|
||||||
// emb: [N, emb_channels] aka [b*t, emb_channels]
|
// emb: [N, emb_channels] aka [b*t, emb_channels]
|
||||||
@ -588,4 +590,4 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __COMMON_HPP__
|
#endif // __COMMON_BLOCK_HPP__
|
||||||
108
src/common_dit.hpp
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#ifndef __COMMON_DIT_HPP__
|
||||||
|
#define __COMMON_DIT_HPP__
|
||||||
|
|
||||||
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
|
namespace DiT {
|
||||||
|
ggml_tensor* patchify(ggml_context* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
int pw,
|
||||||
|
int ph,
|
||||||
|
bool patch_last = true) {
|
||||||
|
// x: [N, C, H, W]
|
||||||
|
// return: [N, h*w, C*ph*pw] if patch_last else [N, h*w, ph*pw*C]
|
||||||
|
int64_t N = x->ne[3];
|
||||||
|
int64_t C = x->ne[2];
|
||||||
|
int64_t H = x->ne[1];
|
||||||
|
int64_t W = x->ne[0];
|
||||||
|
int64_t h = H / ph;
|
||||||
|
int64_t w = W / pw;
|
||||||
|
|
||||||
|
GGML_ASSERT(h * ph == H && w * pw == W);
|
||||||
|
|
||||||
|
x = ggml_reshape_4d(ctx, x, pw, w, ph, h * C * N); // [N*C*h, ph, w, pw]
|
||||||
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, ph, pw]
|
||||||
|
x = ggml_reshape_4d(ctx, x, pw * ph, w * h, C, N); // [N, C, h*w, ph*pw]
|
||||||
|
if (patch_last) {
|
||||||
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, ph*pw]
|
||||||
|
x = ggml_reshape_3d(ctx, x, pw * ph * C, w * h, N); // [N, h*w, C*ph*pw]
|
||||||
|
} else {
|
||||||
|
x = ggml_cont(ctx, ggml_ext_torch_permute(ctx, x, 2, 0, 1, 3)); // [N, h*w, C, ph*pw]
|
||||||
|
x = ggml_reshape_3d(ctx, x, C * pw * ph, w * h, N); // [N, h*w, ph*pw*C]
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* unpatchify(ggml_context* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
int64_t h,
|
||||||
|
int64_t w,
|
||||||
|
int ph,
|
||||||
|
int pw,
|
||||||
|
bool patch_last = true) {
|
||||||
|
// x: [N, h*w, C*ph*pw] if patch_last else [N, h*w, ph*pw*C]
|
||||||
|
// return: [N, C, H, W]
|
||||||
|
int64_t N = x->ne[2];
|
||||||
|
int64_t C = x->ne[0] / ph / pw;
|
||||||
|
int64_t H = h * ph;
|
||||||
|
int64_t W = w * pw;
|
||||||
|
|
||||||
|
GGML_ASSERT(C * ph * pw == x->ne[0]);
|
||||||
|
|
||||||
|
if (patch_last) {
|
||||||
|
x = ggml_reshape_4d(ctx, x, pw * ph, C, w * h, N); // [N, h*w, C, ph*pw]
|
||||||
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, C, h*w, ph*pw]
|
||||||
|
} else {
|
||||||
|
x = ggml_reshape_4d(ctx, x, C, pw * ph, w * h, N); // [N, h*w, ph*pw, C]
|
||||||
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 2, 0, 1, 3)); // [N, C, h*w, ph*pw]
|
||||||
|
}
|
||||||
|
|
||||||
|
x = ggml_reshape_4d(ctx, x, pw, ph, w, h * C * N); // [N*C*h, w, ph, pw]
|
||||||
|
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, ph, w, pw]
|
||||||
|
x = ggml_reshape_4d(ctx, x, W, H, C, N); // [N, C, h*ph, w*pw]
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* pad_to_patch_size(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
int ph,
|
||||||
|
int pw) {
|
||||||
|
int64_t W = x->ne[0];
|
||||||
|
int64_t H = x->ne[1];
|
||||||
|
|
||||||
|
int pad_h = (ph - H % ph) % ph;
|
||||||
|
int pad_w = (pw - W % pw) % pw;
|
||||||
|
x = ggml_ext_pad(ctx->ggml_ctx, x, pad_w, pad_h, 0, 0, ctx->circular_x_enabled, ctx->circular_y_enabled);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* pad_and_patchify(GGMLRunnerContext* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
int ph,
|
||||||
|
int pw,
|
||||||
|
bool patch_last = true) {
|
||||||
|
x = pad_to_patch_size(ctx, x, ph, pw);
|
||||||
|
x = patchify(ctx->ggml_ctx, x, ph, pw, patch_last);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tensor* unpatchify_and_crop(ggml_context* ctx,
|
||||||
|
ggml_tensor* x,
|
||||||
|
int64_t H,
|
||||||
|
int64_t W,
|
||||||
|
int ph,
|
||||||
|
int pw,
|
||||||
|
bool patch_last = true) {
|
||||||
|
int pad_h = (ph - H % ph) % ph;
|
||||||
|
int pad_w = (pw - W % pw) % pw;
|
||||||
|
int64_t h = ((H + pad_h) / ph);
|
||||||
|
int64_t w = ((W + pad_w) / pw);
|
||||||
|
x = unpatchify(ctx, x, h, w, ph, pw, patch_last); // [N, C, H + pad_h, W + pad_w]
|
||||||
|
x = ggml_ext_slice(ctx, x, 1, 0, H); // [N, C, H, W + pad_w]
|
||||||
|
x = ggml_ext_slice(ctx, x, 0, 0, W); // [N, C, H, W]
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
} // namespace DiT
|
||||||
|
|
||||||
|
#endif // __COMMON_DIT_HPP__
|
||||||
@ -1,8 +1,7 @@
|
|||||||
#ifndef __CONTROL_HPP__
|
#ifndef __CONTROL_HPP__
|
||||||
#define __CONTROL_HPP__
|
#define __CONTROL_HPP__
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common_block.hpp"
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
#include "model.h"
|
#include "model.h"
|
||||||
|
|
||||||
#define CONTROL_NET_GRAPH_SIZE 1536
|
#define CONTROL_NET_GRAPH_SIZE 1536
|
||||||
@ -165,26 +164,26 @@ public:
|
|||||||
blocks["middle_block_out.0"] = std::shared_ptr<GGMLBlock>(make_zero_conv(ch));
|
blocks["middle_block_out.0"] = std::shared_ptr<GGMLBlock>(make_zero_conv(ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* resblock_forward(std::string name,
|
ggml_tensor* resblock_forward(std::string name,
|
||||||
GGMLRunnerContext* ctx,
|
GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* emb) {
|
ggml_tensor* emb) {
|
||||||
auto block = std::dynamic_pointer_cast<ResBlock>(blocks[name]);
|
auto block = std::dynamic_pointer_cast<ResBlock>(blocks[name]);
|
||||||
return block->forward(ctx, x, emb);
|
return block->forward(ctx, x, emb);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* attention_layer_forward(std::string name,
|
ggml_tensor* attention_layer_forward(std::string name,
|
||||||
GGMLRunnerContext* ctx,
|
GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* context) {
|
ggml_tensor* context) {
|
||||||
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
|
auto block = std::dynamic_pointer_cast<SpatialTransformer>(blocks[name]);
|
||||||
return block->forward(ctx, x, context);
|
return block->forward(ctx, x, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* input_hint_block_forward(GGMLRunnerContext* ctx,
|
ggml_tensor* input_hint_block_forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* hint,
|
ggml_tensor* hint,
|
||||||
struct ggml_tensor* emb,
|
ggml_tensor* emb,
|
||||||
struct ggml_tensor* context) {
|
ggml_tensor* context) {
|
||||||
int num_input_blocks = 15;
|
int num_input_blocks = 15;
|
||||||
auto h = hint;
|
auto h = hint;
|
||||||
for (int i = 0; i < num_input_blocks; i++) {
|
for (int i = 0; i < num_input_blocks; i++) {
|
||||||
@ -199,13 +198,13 @@ public:
|
|||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
std::vector<ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* hint,
|
ggml_tensor* hint,
|
||||||
struct ggml_tensor* guided_hint,
|
ggml_tensor* guided_hint,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* y = nullptr) {
|
ggml_tensor* y = nullptr) {
|
||||||
// x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
|
// x: [N, in_channels, h, w] or [N, in_channels/2, h, w]
|
||||||
// timesteps: [N,]
|
// timesteps: [N,]
|
||||||
// context: [N, max_position, hidden_size] or [1, max_position, hidden_size]. for example, [N, 77, 768]
|
// context: [N, max_position, hidden_size] or [1, max_position, hidden_size]. for example, [N, 77, 768]
|
||||||
@ -247,7 +246,7 @@ public:
|
|||||||
emb = ggml_add(ctx->ggml_ctx, emb, label_emb); // [N, time_embed_dim]
|
emb = ggml_add(ctx->ggml_ctx, emb, label_emb); // [N, time_embed_dim]
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> outs;
|
std::vector<ggml_tensor*> outs;
|
||||||
|
|
||||||
if (guided_hint == nullptr) {
|
if (guided_hint == nullptr) {
|
||||||
guided_hint = input_hint_block_forward(ctx, hint, emb, context);
|
guided_hint = input_hint_block_forward(ctx, hint, emb, context);
|
||||||
@ -313,8 +312,8 @@ struct ControlNet : public GGMLRunner {
|
|||||||
|
|
||||||
ggml_backend_buffer_t control_buffer = nullptr; // keep control output tensors in backend memory
|
ggml_backend_buffer_t control_buffer = nullptr; // keep control output tensors in backend memory
|
||||||
ggml_context* control_ctx = nullptr;
|
ggml_context* control_ctx = nullptr;
|
||||||
std::vector<struct ggml_tensor*> controls; // (12 input block outputs, 1 middle block output) SD 1.5
|
std::vector<ggml_tensor*> controls; // (12 input block outputs, 1 middle block output) SD 1.5
|
||||||
struct ggml_tensor* guided_hint = nullptr; // guided_hint cache, for faster inference
|
ggml_tensor* guided_hint = nullptr; // guided_hint cache, for faster inference
|
||||||
bool guided_hint_cached = false;
|
bool guided_hint_cached = false;
|
||||||
|
|
||||||
ControlNet(ggml_backend_t backend,
|
ControlNet(ggml_backend_t backend,
|
||||||
@ -329,8 +328,8 @@ struct ControlNet : public GGMLRunner {
|
|||||||
free_control_ctx();
|
free_control_ctx();
|
||||||
}
|
}
|
||||||
|
|
||||||
void alloc_control_ctx(std::vector<struct ggml_tensor*> outs) {
|
void alloc_control_ctx(std::vector<ggml_tensor*> outs) {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(outs.size() * ggml_tensor_overhead()) + 1024 * 1024;
|
params.mem_size = static_cast<size_t>(outs.size() * ggml_tensor_overhead()) + 1024 * 1024;
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = true;
|
params.no_alloc = true;
|
||||||
@ -371,16 +370,16 @@ struct ControlNet : public GGMLRunner {
|
|||||||
return "control_net";
|
return "control_net";
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
control_net.get_param_tensors(tensors, prefix);
|
control_net.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
|
ggml_cgraph* build_graph(ggml_tensor* x,
|
||||||
struct ggml_tensor* hint,
|
ggml_tensor* hint,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* y = nullptr) {
|
ggml_tensor* y = nullptr) {
|
||||||
struct ggml_cgraph* gf = new_graph_custom(CONTROL_NET_GRAPH_SIZE);
|
ggml_cgraph* gf = new_graph_custom(CONTROL_NET_GRAPH_SIZE);
|
||||||
|
|
||||||
x = to_backend(x);
|
x = to_backend(x);
|
||||||
if (guided_hint_cached) {
|
if (guided_hint_cached) {
|
||||||
@ -414,25 +413,29 @@ struct ControlNet : public GGMLRunner {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
bool compute(int n_threads,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* hint,
|
ggml_tensor* hint,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* y,
|
ggml_tensor* y,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) {
|
ggml_context* output_ctx = nullptr) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
// timesteps: [N, ]
|
// timesteps: [N, ]
|
||||||
// context: [N, max_position, hidden_size]([N, 77, 768]) or [1, max_position, hidden_size]
|
// context: [N, max_position, hidden_size]([N, 77, 768]) or [1, max_position, hidden_size]
|
||||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(x, hint, timesteps, context, y);
|
return build_graph(x, hint, timesteps, context, y);
|
||||||
};
|
};
|
||||||
|
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
bool res = GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
|
if (res) {
|
||||||
|
// cache guided_hint
|
||||||
guided_hint_cached = true;
|
guided_hint_cached = true;
|
||||||
}
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
bool load_from_file(const std::string& file_path, int n_threads) {
|
bool load_from_file(const std::string& file_path, int n_threads) {
|
||||||
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
LOG_INFO("loading control net from '%s'", file_path.c_str());
|
||||||
@ -1,43 +1,46 @@
|
|||||||
#ifndef __DIFFUSION_MODEL_H__
|
#ifndef __DIFFUSION_MODEL_H__
|
||||||
#define __DIFFUSION_MODEL_H__
|
#define __DIFFUSION_MODEL_H__
|
||||||
|
|
||||||
|
#include "anima.hpp"
|
||||||
#include "flux.hpp"
|
#include "flux.hpp"
|
||||||
#include "mmdit.hpp"
|
#include "mmdit.hpp"
|
||||||
#include "qwen_image.hpp"
|
#include "qwen_image.hpp"
|
||||||
#include "unet.hpp"
|
#include "unet.hpp"
|
||||||
#include "wan.hpp"
|
#include "wan.hpp"
|
||||||
|
#include "z_image.hpp"
|
||||||
|
|
||||||
struct DiffusionParams {
|
struct DiffusionParams {
|
||||||
struct ggml_tensor* x = nullptr;
|
ggml_tensor* x = nullptr;
|
||||||
struct ggml_tensor* timesteps = nullptr;
|
ggml_tensor* timesteps = nullptr;
|
||||||
struct ggml_tensor* context = nullptr;
|
ggml_tensor* context = nullptr;
|
||||||
struct ggml_tensor* c_concat = nullptr;
|
ggml_tensor* c_concat = nullptr;
|
||||||
struct ggml_tensor* y = nullptr;
|
ggml_tensor* y = nullptr;
|
||||||
struct ggml_tensor* guidance = nullptr;
|
ggml_tensor* guidance = nullptr;
|
||||||
std::vector<ggml_tensor*> ref_latents = {};
|
std::vector<ggml_tensor*> ref_latents = {};
|
||||||
bool increase_ref_index = false;
|
bool increase_ref_index = false;
|
||||||
int num_video_frames = -1;
|
int num_video_frames = -1;
|
||||||
std::vector<struct ggml_tensor*> controls = {};
|
std::vector<ggml_tensor*> controls = {};
|
||||||
float control_strength = 0.f;
|
float control_strength = 0.f;
|
||||||
struct ggml_tensor* vace_context = nullptr;
|
ggml_tensor* vace_context = nullptr;
|
||||||
float vace_strength = 1.f;
|
float vace_strength = 1.f;
|
||||||
std::vector<int> skip_layers = {};
|
std::vector<int> skip_layers = {};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DiffusionModel {
|
struct DiffusionModel {
|
||||||
virtual std::string get_desc() = 0;
|
virtual std::string get_desc() = 0;
|
||||||
virtual void compute(int n_threads,
|
virtual bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) = 0;
|
ggml_context* output_ctx = nullptr) = 0;
|
||||||
virtual void alloc_params_buffer() = 0;
|
virtual void alloc_params_buffer() = 0;
|
||||||
virtual void free_params_buffer() = 0;
|
virtual void free_params_buffer() = 0;
|
||||||
virtual void free_compute_buffer() = 0;
|
virtual void free_compute_buffer() = 0;
|
||||||
virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0;
|
virtual void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) = 0;
|
||||||
virtual size_t get_params_buffer_size() = 0;
|
virtual size_t get_params_buffer_size() = 0;
|
||||||
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){};
|
virtual void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter){};
|
||||||
virtual int64_t get_adm_in_channels() = 0;
|
virtual int64_t get_adm_in_channels() = 0;
|
||||||
virtual void set_flash_attn_enabled(bool enabled) = 0;
|
virtual void set_flash_attention_enabled(bool enabled) = 0;
|
||||||
|
virtual void set_circular_axes(bool circular_x, bool circular_y) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct UNetModel : public DiffusionModel {
|
struct UNetModel : public DiffusionModel {
|
||||||
@ -66,7 +69,7 @@ struct UNetModel : public DiffusionModel {
|
|||||||
unet.free_compute_buffer();
|
unet.free_compute_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
unet.get_param_tensors(tensors, "model.diffusion_model");
|
unet.get_param_tensors(tensors, "model.diffusion_model");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,14 +85,18 @@ struct UNetModel : public DiffusionModel {
|
|||||||
return unet.unet.adm_in_channels;
|
return unet.unet.adm_in_channels;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flash_attn_enabled(bool enabled) {
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
unet.set_flash_attention_enabled(enabled);
|
unet.set_flash_attention_enabled(enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
unet.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
return unet.compute(n_threads,
|
return unet.compute(n_threads,
|
||||||
diffusion_params.x,
|
diffusion_params.x,
|
||||||
diffusion_params.timesteps,
|
diffusion_params.timesteps,
|
||||||
@ -127,7 +134,7 @@ struct MMDiTModel : public DiffusionModel {
|
|||||||
mmdit.free_compute_buffer();
|
mmdit.free_compute_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
mmdit.get_param_tensors(tensors, "model.diffusion_model");
|
mmdit.get_param_tensors(tensors, "model.diffusion_model");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,14 +150,18 @@ struct MMDiTModel : public DiffusionModel {
|
|||||||
return 768 + 1280;
|
return 768 + 1280;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flash_attn_enabled(bool enabled) {
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
mmdit.set_flash_attention_enabled(enabled);
|
mmdit.set_flash_attention_enabled(enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
mmdit.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
return mmdit.compute(n_threads,
|
return mmdit.compute(n_threads,
|
||||||
diffusion_params.x,
|
diffusion_params.x,
|
||||||
diffusion_params.timesteps,
|
diffusion_params.timesteps,
|
||||||
@ -189,7 +200,7 @@ struct FluxModel : public DiffusionModel {
|
|||||||
flux.free_compute_buffer();
|
flux.free_compute_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
flux.get_param_tensors(tensors, "model.diffusion_model");
|
flux.get_param_tensors(tensors, "model.diffusion_model");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,14 +216,18 @@ struct FluxModel : public DiffusionModel {
|
|||||||
return 768;
|
return 768;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flash_attn_enabled(bool enabled) {
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
flux.set_flash_attention_enabled(enabled);
|
flux.set_flash_attention_enabled(enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
flux.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
return flux.compute(n_threads,
|
return flux.compute(n_threads,
|
||||||
diffusion_params.x,
|
diffusion_params.x,
|
||||||
diffusion_params.timesteps,
|
diffusion_params.timesteps,
|
||||||
@ -228,6 +243,72 @@ struct FluxModel : public DiffusionModel {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct AnimaModel : public DiffusionModel {
|
||||||
|
std::string prefix;
|
||||||
|
Anima::AnimaRunner anima;
|
||||||
|
|
||||||
|
AnimaModel(ggml_backend_t backend,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
|
const std::string prefix = "model.diffusion_model")
|
||||||
|
: prefix(prefix), anima(backend, offload_params_to_cpu, tensor_storage_map, prefix) {
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_desc() override {
|
||||||
|
return anima.get_desc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void alloc_params_buffer() override {
|
||||||
|
anima.alloc_params_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_params_buffer() override {
|
||||||
|
anima.free_params_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_compute_buffer() override {
|
||||||
|
anima.free_compute_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
|
anima.get_param_tensors(tensors, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_params_buffer_size() override {
|
||||||
|
return anima.get_params_buffer_size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) override {
|
||||||
|
anima.set_weight_adapter(adapter);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t get_adm_in_channels() override {
|
||||||
|
return 768;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
|
anima.set_flash_attention_enabled(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
anima.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
|
DiffusionParams diffusion_params,
|
||||||
|
ggml_tensor** output = nullptr,
|
||||||
|
ggml_context* output_ctx = nullptr) override {
|
||||||
|
return anima.compute(n_threads,
|
||||||
|
diffusion_params.x,
|
||||||
|
diffusion_params.timesteps,
|
||||||
|
diffusion_params.context,
|
||||||
|
diffusion_params.c_concat,
|
||||||
|
diffusion_params.y,
|
||||||
|
output,
|
||||||
|
output_ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct WanModel : public DiffusionModel {
|
struct WanModel : public DiffusionModel {
|
||||||
std::string prefix;
|
std::string prefix;
|
||||||
WAN::WanRunner wan;
|
WAN::WanRunner wan;
|
||||||
@ -256,7 +337,7 @@ struct WanModel : public DiffusionModel {
|
|||||||
wan.free_compute_buffer();
|
wan.free_compute_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
wan.get_param_tensors(tensors, prefix);
|
wan.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -272,14 +353,18 @@ struct WanModel : public DiffusionModel {
|
|||||||
return 768;
|
return 768;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flash_attn_enabled(bool enabled) {
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
wan.set_flash_attention_enabled(enabled);
|
wan.set_flash_attention_enabled(enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
wan.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
return wan.compute(n_threads,
|
return wan.compute(n_threads,
|
||||||
diffusion_params.x,
|
diffusion_params.x,
|
||||||
diffusion_params.timesteps,
|
diffusion_params.timesteps,
|
||||||
@ -302,8 +387,9 @@ struct QwenImageModel : public DiffusionModel {
|
|||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "model.diffusion_model",
|
const std::string prefix = "model.diffusion_model",
|
||||||
SDVersion version = VERSION_QWEN_IMAGE)
|
SDVersion version = VERSION_QWEN_IMAGE,
|
||||||
: prefix(prefix), qwen_image(backend, offload_params_to_cpu, tensor_storage_map, prefix, version) {
|
bool zero_cond_t = false)
|
||||||
|
: prefix(prefix), qwen_image(backend, offload_params_to_cpu, tensor_storage_map, prefix, version, zero_cond_t) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string get_desc() override {
|
std::string get_desc() override {
|
||||||
@ -322,7 +408,7 @@ struct QwenImageModel : public DiffusionModel {
|
|||||||
qwen_image.free_compute_buffer();
|
qwen_image.free_compute_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) override {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
qwen_image.get_param_tensors(tensors, prefix);
|
qwen_image.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -338,14 +424,18 @@ struct QwenImageModel : public DiffusionModel {
|
|||||||
return 768;
|
return 768;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_flash_attn_enabled(bool enabled) {
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
qwen_image.set_flash_attention_enabled(enabled);
|
qwen_image.set_flash_attention_enabled(enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
qwen_image.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
DiffusionParams diffusion_params,
|
DiffusionParams diffusion_params,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) override {
|
ggml_context* output_ctx = nullptr) override {
|
||||||
return qwen_image.compute(n_threads,
|
return qwen_image.compute(n_threads,
|
||||||
diffusion_params.x,
|
diffusion_params.x,
|
||||||
diffusion_params.timesteps,
|
diffusion_params.timesteps,
|
||||||
@ -357,4 +447,71 @@ struct QwenImageModel : public DiffusionModel {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ZImageModel : public DiffusionModel {
|
||||||
|
std::string prefix;
|
||||||
|
ZImage::ZImageRunner z_image;
|
||||||
|
|
||||||
|
ZImageModel(ggml_backend_t backend,
|
||||||
|
bool offload_params_to_cpu,
|
||||||
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
|
const std::string prefix = "model.diffusion_model",
|
||||||
|
SDVersion version = VERSION_Z_IMAGE)
|
||||||
|
: prefix(prefix), z_image(backend, offload_params_to_cpu, tensor_storage_map, prefix, version) {
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_desc() override {
|
||||||
|
return z_image.get_desc();
|
||||||
|
}
|
||||||
|
|
||||||
|
void alloc_params_buffer() override {
|
||||||
|
z_image.alloc_params_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_params_buffer() override {
|
||||||
|
z_image.free_params_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_compute_buffer() override {
|
||||||
|
z_image.free_compute_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors) override {
|
||||||
|
z_image.get_param_tensors(tensors, prefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_params_buffer_size() override {
|
||||||
|
return z_image.get_params_buffer_size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_weight_adapter(const std::shared_ptr<WeightAdapter>& adapter) override {
|
||||||
|
z_image.set_weight_adapter(adapter);
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t get_adm_in_channels() override {
|
||||||
|
return 768;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_flash_attention_enabled(bool enabled) {
|
||||||
|
z_image.set_flash_attention_enabled(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_circular_axes(bool circular_x, bool circular_y) override {
|
||||||
|
z_image.set_circular_axes(circular_x, circular_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool compute(int n_threads,
|
||||||
|
DiffusionParams diffusion_params,
|
||||||
|
ggml_tensor** output = nullptr,
|
||||||
|
ggml_context* output_ctx = nullptr) override {
|
||||||
|
return z_image.compute(n_threads,
|
||||||
|
diffusion_params.x,
|
||||||
|
diffusion_params.timesteps,
|
||||||
|
diffusion_params.context,
|
||||||
|
diffusion_params.ref_latents,
|
||||||
|
true, // increase_ref_index
|
||||||
|
output,
|
||||||
|
output_ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@ -27,11 +27,11 @@ public:
|
|||||||
blocks["conv5"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat + 4 * num_grow_ch, num_feat, {3, 3}, {1, 1}, {1, 1}));
|
blocks["conv5"] = std::shared_ptr<GGMLBlock>(new Conv2d(num_feat + 4 * num_grow_ch, num_feat, {3, 3}, {1, 1}, {1, 1}));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* lrelu(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* lrelu(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
return ggml_leaky_relu(ctx->ggml_ctx, x, 0.2f, true);
|
return ggml_leaky_relu(ctx->ggml_ctx, x, 0.2f, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [n, num_feat, h, w]
|
// x: [n, num_feat, h, w]
|
||||||
// return: [n, num_feat, h, w]
|
// return: [n, num_feat, h, w]
|
||||||
|
|
||||||
@ -51,7 +51,7 @@ public:
|
|||||||
x_cat = ggml_concat(ctx->ggml_ctx, x_cat, x4, 2);
|
x_cat = ggml_concat(ctx->ggml_ctx, x_cat, x4, 2);
|
||||||
auto x5 = conv5->forward(ctx, x_cat);
|
auto x5 = conv5->forward(ctx, x_cat);
|
||||||
|
|
||||||
x5 = ggml_add(ctx->ggml_ctx, ggml_scale(ctx->ggml_ctx, x5, 0.2f), x);
|
x5 = ggml_add(ctx->ggml_ctx, ggml_ext_scale(ctx->ggml_ctx, x5, 0.2f), x);
|
||||||
return x5;
|
return x5;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -64,7 +64,7 @@ public:
|
|||||||
blocks["rdb3"] = std::shared_ptr<GGMLBlock>(new ResidualDenseBlock(num_feat, num_grow_ch));
|
blocks["rdb3"] = std::shared_ptr<GGMLBlock>(new ResidualDenseBlock(num_feat, num_grow_ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [n, num_feat, h, w]
|
// x: [n, num_feat, h, w]
|
||||||
// return: [n, num_feat, h, w]
|
// return: [n, num_feat, h, w]
|
||||||
|
|
||||||
@ -76,7 +76,7 @@ public:
|
|||||||
out = rdb2->forward(ctx, out);
|
out = rdb2->forward(ctx, out);
|
||||||
out = rdb3->forward(ctx, out);
|
out = rdb3->forward(ctx, out);
|
||||||
|
|
||||||
out = ggml_add(ctx->ggml_ctx, ggml_scale(ctx->ggml_ctx, out, 0.2f), x);
|
out = ggml_add(ctx->ggml_ctx, ggml_ext_scale(ctx->ggml_ctx, out, 0.2f), x);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -112,11 +112,11 @@ public:
|
|||||||
int get_scale() { return scale; }
|
int get_scale() { return scale; }
|
||||||
int get_num_block() { return num_block; }
|
int get_num_block() { return num_block; }
|
||||||
|
|
||||||
struct ggml_tensor* lrelu(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* lrelu(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
return ggml_leaky_relu(ctx->ggml_ctx, x, 0.2f, true);
|
return ggml_leaky_relu(ctx->ggml_ctx, x, 0.2f, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [n, num_in_ch, h, w]
|
// x: [n, num_in_ch, h, w]
|
||||||
// return: [n, num_out_ch, h*scale, w*scale]
|
// return: [n, num_out_ch, h*scale, w*scale]
|
||||||
auto conv_first = std::dynamic_pointer_cast<Conv2d>(blocks["conv_first"]);
|
auto conv_first = std::dynamic_pointer_cast<Conv2d>(blocks["conv_first"]);
|
||||||
@ -156,9 +156,10 @@ struct ESRGAN : public GGMLRunner {
|
|||||||
|
|
||||||
ESRGAN(ggml_backend_t backend,
|
ESRGAN(ggml_backend_t backend,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
|
int tile_size = 128,
|
||||||
const String2TensorStorage& tensor_storage_map = {})
|
const String2TensorStorage& tensor_storage_map = {})
|
||||||
: GGMLRunner(backend, offload_params_to_cpu) {
|
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||||
// rrdb_net will be created in load_from_file
|
this->tile_size = tile_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string get_desc() override {
|
std::string get_desc() override {
|
||||||
@ -340,27 +341,27 @@ struct ESRGAN : public GGMLRunner {
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* x) {
|
ggml_cgraph* build_graph(ggml_tensor* x) {
|
||||||
if (!rrdb_net)
|
if (!rrdb_net)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
constexpr int kGraphNodes = 1 << 16; // 65k
|
constexpr int kGraphNodes = 1 << 16; // 65k
|
||||||
struct ggml_cgraph* gf = new_graph_custom(kGraphNodes);
|
ggml_cgraph* gf = new_graph_custom(kGraphNodes);
|
||||||
x = to_backend(x);
|
x = to_backend(x);
|
||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
struct ggml_tensor* out = rrdb_net->forward(&runner_ctx, x);
|
ggml_tensor* out = rrdb_net->forward(&runner_ctx, x);
|
||||||
ggml_build_forward_expand(gf, out);
|
ggml_build_forward_expand(gf, out);
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(const int n_threads,
|
bool compute(const int n_threads,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
ggml_tensor** output,
|
ggml_tensor** output,
|
||||||
ggml_context* output_ctx = nullptr) {
|
ggml_context* output_ctx = nullptr) {
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(x);
|
return build_graph(x);
|
||||||
};
|
};
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (n_dims > GGML_MAX_DIMS) {
|
if (n_dims > GGML_MAX_DIMS) {
|
||||||
for (int i = GGML_MAX_DIMS; i < n_dims; i++) {
|
for (uint32_t i = GGML_MAX_DIMS; i < n_dims; i++) {
|
||||||
info.shape[GGML_MAX_DIMS - 1] *= info.shape[i]; // stack to last dim;
|
info.shape[GGML_MAX_DIMS - 1] *= info.shape[i]; // stack to last dim;
|
||||||
}
|
}
|
||||||
info.shape.resize(GGML_MAX_DIMS);
|
info.shape.resize(GGML_MAX_DIMS);
|
||||||
@ -91,6 +91,41 @@ const float flux_latent_rgb_proj[16][3] = {
|
|||||||
{-0.111849f, -0.055589f, -0.032361f}};
|
{-0.111849f, -0.055589f, -0.032361f}};
|
||||||
float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
|
float flux_latent_rgb_bias[3] = {0.024600f, -0.006937f, -0.008089f};
|
||||||
|
|
||||||
|
const float flux2_latent_rgb_proj[32][3] = {
|
||||||
|
{0.000736f, -0.008385f, -0.019710f},
|
||||||
|
{-0.001352f, -0.016392f, 0.020693f},
|
||||||
|
{-0.006376f, 0.002428f, 0.036736f},
|
||||||
|
{0.039384f, 0.074167f, 0.119789f},
|
||||||
|
{0.007464f, -0.005705f, -0.004734f},
|
||||||
|
{-0.004086f, 0.005287f, -0.000409f},
|
||||||
|
{-0.032835f, 0.050802f, -0.028120f},
|
||||||
|
{-0.003158f, -0.000835f, 0.000406f},
|
||||||
|
{-0.112840f, -0.084337f, -0.023083f},
|
||||||
|
{0.001462f, -0.006656f, 0.000549f},
|
||||||
|
{-0.009980f, -0.007480f, 0.009702f},
|
||||||
|
{0.032540f, 0.000214f, -0.061388f},
|
||||||
|
{0.011023f, 0.000694f, 0.007143f},
|
||||||
|
{-0.001468f, -0.006723f, -0.001678f},
|
||||||
|
{-0.005921f, -0.010320f, -0.003907f},
|
||||||
|
{-0.028434f, 0.027584f, 0.018457f},
|
||||||
|
{0.014349f, 0.011523f, 0.000441f},
|
||||||
|
{0.009874f, 0.003081f, 0.001507f},
|
||||||
|
{0.002218f, 0.005712f, 0.001563f},
|
||||||
|
{0.053010f, -0.019844f, 0.008683f},
|
||||||
|
{-0.002507f, 0.005384f, 0.000938f},
|
||||||
|
{-0.002177f, -0.011366f, 0.003559f},
|
||||||
|
{-0.000261f, 0.015121f, -0.003240f},
|
||||||
|
{-0.003944f, -0.002083f, 0.005043f},
|
||||||
|
{-0.009138f, 0.011336f, 0.003781f},
|
||||||
|
{0.011429f, 0.003985f, -0.003855f},
|
||||||
|
{0.010518f, -0.005586f, 0.010131f},
|
||||||
|
{0.007883f, 0.002912f, -0.001473f},
|
||||||
|
{-0.003318f, -0.003160f, 0.003684f},
|
||||||
|
{-0.034560f, -0.008740f, 0.012996f},
|
||||||
|
{0.000166f, 0.001079f, -0.012153f},
|
||||||
|
{0.017772f, 0.000937f, -0.011953f}};
|
||||||
|
float flux2_latent_rgb_bias[3] = {-0.028738f, -0.098463f, -0.107619f};
|
||||||
|
|
||||||
// This one was taken straight from
|
// This one was taken straight from
|
||||||
// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
|
// https://github.com/Stability-AI/sd3.5/blob/8565799a3b41eb0c7ba976d18375f0f753f56402/sd3_impls.py#L288-L303
|
||||||
// (MiT Licence)
|
// (MiT Licence)
|
||||||
@ -128,16 +163,42 @@ const float sd_latent_rgb_proj[4][3] = {
|
|||||||
{-0.178022f, -0.200862f, -0.678514f}};
|
{-0.178022f, -0.200862f, -0.678514f}};
|
||||||
float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
|
float sd_latent_rgb_bias[3] = {-0.017478f, -0.055834f, -0.105825f};
|
||||||
|
|
||||||
void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int width, int height, int frames, int dim) {
|
void preview_latent_video(uint8_t* buffer, ggml_tensor* latents, const float (*latent_rgb_proj)[3], const float latent_rgb_bias[3], int patch_size) {
|
||||||
size_t buffer_head = 0;
|
size_t buffer_head = 0;
|
||||||
for (int k = 0; k < frames; k++) {
|
|
||||||
for (int j = 0; j < height; j++) {
|
uint32_t latent_width = static_cast<uint32_t>(latents->ne[0]);
|
||||||
for (int i = 0; i < width; i++) {
|
uint32_t latent_height = static_cast<uint32_t>(latents->ne[1]);
|
||||||
size_t latent_id = (i * latents->nb[0] + j * latents->nb[1] + k * latents->nb[2]);
|
uint32_t dim = static_cast<uint32_t>(latents->ne[ggml_n_dims(latents) - 1]);
|
||||||
|
uint32_t frames = 1;
|
||||||
|
if (ggml_n_dims(latents) == 4) {
|
||||||
|
frames = static_cast<uint32_t>(latents->ne[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t rgb_width = latent_width * patch_size;
|
||||||
|
uint32_t rgb_height = latent_height * patch_size;
|
||||||
|
|
||||||
|
uint32_t unpatched_dim = dim / (patch_size * patch_size);
|
||||||
|
|
||||||
|
for (uint32_t k = 0; k < frames; k++) {
|
||||||
|
for (uint32_t rgb_x = 0; rgb_x < rgb_width; rgb_x++) {
|
||||||
|
for (uint32_t rgb_y = 0; rgb_y < rgb_height; rgb_y++) {
|
||||||
|
int latent_x = rgb_x / patch_size;
|
||||||
|
int latent_y = rgb_y / patch_size;
|
||||||
|
|
||||||
|
int channel_offset = 0;
|
||||||
|
if (patch_size > 1) {
|
||||||
|
channel_offset = ((rgb_y % patch_size) * patch_size + (rgb_x % patch_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t latent_id = (latent_x * latents->nb[0] + latent_y * latents->nb[1] + k * latents->nb[2]);
|
||||||
|
|
||||||
|
// should be incremented by 1 for each pixel
|
||||||
|
size_t pixel_id = k * rgb_width * rgb_height + rgb_y * rgb_width + rgb_x;
|
||||||
|
|
||||||
float r = 0, g = 0, b = 0;
|
float r = 0, g = 0, b = 0;
|
||||||
if (latent_rgb_proj != nullptr) {
|
if (latent_rgb_proj != nullptr) {
|
||||||
for (int d = 0; d < dim; d++) {
|
for (uint32_t d = 0; d < unpatched_dim; d++) {
|
||||||
float value = *(float*)((char*)latents->data + latent_id + d * latents->nb[ggml_n_dims(latents) - 1]);
|
float value = *(float*)((char*)latents->data + latent_id + (d * patch_size * patch_size + channel_offset) * latents->nb[ggml_n_dims(latents) - 1]);
|
||||||
r += value * latent_rgb_proj[d][0];
|
r += value * latent_rgb_proj[d][0];
|
||||||
g += value * latent_rgb_proj[d][1];
|
g += value * latent_rgb_proj[d][1];
|
||||||
b += value * latent_rgb_proj[d][2];
|
b += value * latent_rgb_proj[d][2];
|
||||||
@ -164,9 +225,9 @@ void preview_latent_video(uint8_t* buffer, struct ggml_tensor* latents, const fl
|
|||||||
g = g >= 0 ? g <= 1 ? g : 1 : 0;
|
g = g >= 0 ? g <= 1 ? g : 1 : 0;
|
||||||
b = b >= 0 ? b <= 1 ? b : 1 : 0;
|
b = b >= 0 ? b <= 1 ? b : 1 : 0;
|
||||||
|
|
||||||
buffer[buffer_head++] = (uint8_t)(r * 255);
|
buffer[pixel_id * 3 + 0] = (uint8_t)(r * 255);
|
||||||
buffer[buffer_head++] = (uint8_t)(g * 255);
|
buffer[pixel_id * 3 + 1] = (uint8_t)(g * 255);
|
||||||
buffer[buffer_head++] = (uint8_t)(b * 255);
|
buffer[pixel_id * 3 + 2] = (uint8_t)(b * 255);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -9,7 +9,7 @@
|
|||||||
struct LoraModel : public GGMLRunner {
|
struct LoraModel : public GGMLRunner {
|
||||||
std::string lora_id;
|
std::string lora_id;
|
||||||
float multiplier = 1.0f;
|
float multiplier = 1.0f;
|
||||||
std::unordered_map<std::string, struct ggml_tensor*> lora_tensors;
|
std::unordered_map<std::string, ggml_tensor*> lora_tensors;
|
||||||
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
std::map<ggml_tensor*, ggml_tensor*> original_tensor_to_final_tensor;
|
||||||
std::set<std::string> applied_lora_tensors;
|
std::set<std::string> applied_lora_tensors;
|
||||||
std::string file_path;
|
std::string file_path;
|
||||||
@ -78,7 +78,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
for (const auto& pair : tensors_to_create) {
|
for (const auto& pair : tensors_to_create) {
|
||||||
const auto& name = pair.first;
|
const auto& name = pair.first;
|
||||||
const auto& ts = pair.second;
|
const auto& ts = pair.second;
|
||||||
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
|
ggml_tensor* real = ggml_new_tensor(params_ctx,
|
||||||
ts.type,
|
ts.type,
|
||||||
ts.n_dims,
|
ts.n_dims,
|
||||||
ts.ne);
|
ts.ne);
|
||||||
@ -195,7 +195,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
scale_value *= multiplier;
|
scale_value *= multiplier;
|
||||||
|
|
||||||
auto curr_updown = ggml_ext_merge_lora(ctx, lora_down, lora_up, lora_mid);
|
auto curr_updown = ggml_ext_merge_lora(ctx, lora_down, lora_up, lora_mid);
|
||||||
curr_updown = ggml_scale_inplace(ctx, curr_updown, scale_value);
|
curr_updown = ggml_ext_scale(ctx, curr_updown, scale_value, true);
|
||||||
|
|
||||||
if (updown == nullptr) {
|
if (updown == nullptr) {
|
||||||
updown = curr_updown;
|
updown = curr_updown;
|
||||||
@ -235,7 +235,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
float scale_value = 1.0f;
|
float scale_value = 1.0f;
|
||||||
scale_value *= multiplier;
|
scale_value *= multiplier;
|
||||||
|
|
||||||
curr_updown = ggml_scale_inplace(ctx, curr_updown, scale_value);
|
curr_updown = ggml_ext_scale(ctx, curr_updown, scale_value, true);
|
||||||
|
|
||||||
if (updown == nullptr) {
|
if (updown == nullptr) {
|
||||||
updown = curr_updown;
|
updown = curr_updown;
|
||||||
@ -337,10 +337,10 @@ struct LoraModel : public GGMLRunner {
|
|||||||
}
|
}
|
||||||
scale_value *= multiplier;
|
scale_value *= multiplier;
|
||||||
|
|
||||||
struct ggml_tensor* updown_1 = ggml_ext_merge_lora(ctx, hada_1_down, hada_1_up, hada_1_mid);
|
ggml_tensor* updown_1 = ggml_ext_merge_lora(ctx, hada_1_down, hada_1_up, hada_1_mid);
|
||||||
struct ggml_tensor* updown_2 = ggml_ext_merge_lora(ctx, hada_2_down, hada_2_up, hada_2_mid);
|
ggml_tensor* updown_2 = ggml_ext_merge_lora(ctx, hada_2_down, hada_2_up, hada_2_mid);
|
||||||
auto curr_updown = ggml_mul_inplace(ctx, updown_1, updown_2);
|
auto curr_updown = ggml_mul_inplace(ctx, updown_1, updown_2);
|
||||||
curr_updown = ggml_scale_inplace(ctx, curr_updown, scale_value);
|
curr_updown = ggml_ext_scale(ctx, curr_updown, scale_value, true);
|
||||||
if (updown == nullptr) {
|
if (updown == nullptr) {
|
||||||
updown = curr_updown;
|
updown = curr_updown;
|
||||||
} else {
|
} else {
|
||||||
@ -456,7 +456,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
scale_value *= multiplier;
|
scale_value *= multiplier;
|
||||||
|
|
||||||
auto curr_updown = ggml_ext_kronecker(ctx, lokr_w1, lokr_w2);
|
auto curr_updown = ggml_ext_kronecker(ctx, lokr_w1, lokr_w2);
|
||||||
curr_updown = ggml_scale_inplace(ctx, curr_updown, scale_value);
|
curr_updown = ggml_ext_scale(ctx, curr_updown, scale_value, true);
|
||||||
|
|
||||||
if (updown == nullptr) {
|
if (updown == nullptr) {
|
||||||
updown = curr_updown;
|
updown = curr_updown;
|
||||||
@ -468,10 +468,10 @@ struct LoraModel : public GGMLRunner {
|
|||||||
return updown;
|
return updown;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora = true) {
|
ggml_tensor* get_weight_diff(const std::string& model_tensor_name, ggml_context* ctx, ggml_tensor* model_tensor, bool with_lora_and_lokr = true) {
|
||||||
// lora
|
// lora
|
||||||
ggml_tensor* diff = nullptr;
|
ggml_tensor* diff = nullptr;
|
||||||
if (with_lora) {
|
if (with_lora_and_lokr) {
|
||||||
diff = get_lora_weight_diff(model_tensor_name, ctx);
|
diff = get_lora_weight_diff(model_tensor_name, ctx);
|
||||||
}
|
}
|
||||||
// diff
|
// diff
|
||||||
@ -483,7 +483,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
diff = get_loha_weight_diff(model_tensor_name, ctx);
|
diff = get_loha_weight_diff(model_tensor_name, ctx);
|
||||||
}
|
}
|
||||||
// lokr
|
// lokr
|
||||||
if (diff == nullptr) {
|
if (diff == nullptr && with_lora_and_lokr) {
|
||||||
diff = get_lokr_weight_diff(model_tensor_name, ctx);
|
diff = get_lokr_weight_diff(model_tensor_name, ctx);
|
||||||
}
|
}
|
||||||
if (diff != nullptr) {
|
if (diff != nullptr) {
|
||||||
@ -514,6 +514,108 @@ struct LoraModel : public GGMLRunner {
|
|||||||
} else {
|
} else {
|
||||||
key = model_tensor_name + "." + std::to_string(index);
|
key = model_tensor_name + "." + std::to_string(index);
|
||||||
}
|
}
|
||||||
|
bool is_conv2d = forward_params.op_type == WeightAdapter::ForwardParams::op_type_t::OP_CONV2D;
|
||||||
|
|
||||||
|
std::string lokr_w1_name = "lora." + key + ".lokr_w1";
|
||||||
|
std::string lokr_w1_a_name = "lora." + key + ".lokr_w1_a";
|
||||||
|
// if either of these is found, then we have a lokr lora
|
||||||
|
auto iter = lora_tensors.find(lokr_w1_name);
|
||||||
|
auto iter_a = lora_tensors.find(lokr_w1_a_name);
|
||||||
|
if (iter != lora_tensors.end() || iter_a != lora_tensors.end()) {
|
||||||
|
std::string lokr_w1_b_name = "lora." + key + ".lokr_w1_b";
|
||||||
|
std::string lokr_w2_name = "lora." + key + ".lokr_w2";
|
||||||
|
std::string lokr_w2_a_name = "lora." + key + ".lokr_w2_a";
|
||||||
|
std::string lokr_w2_b_name = "lora." + key + ".lokr_w2_b";
|
||||||
|
std::string alpha_name = "lora." + key + ".alpha";
|
||||||
|
|
||||||
|
ggml_tensor* lokr_w1 = nullptr;
|
||||||
|
ggml_tensor* lokr_w1_a = nullptr;
|
||||||
|
ggml_tensor* lokr_w1_b = nullptr;
|
||||||
|
ggml_tensor* lokr_w2 = nullptr;
|
||||||
|
ggml_tensor* lokr_w2_a = nullptr;
|
||||||
|
ggml_tensor* lokr_w2_b = nullptr;
|
||||||
|
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w1 = iter->second;
|
||||||
|
}
|
||||||
|
iter = iter_a;
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w1_a = iter->second;
|
||||||
|
}
|
||||||
|
iter = lora_tensors.find(lokr_w1_b_name);
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w1_b = iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter = lora_tensors.find(lokr_w2_name);
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w2 = iter->second;
|
||||||
|
if (is_conv2d && lokr_w2->type != GGML_TYPE_F16) {
|
||||||
|
lokr_w2 = ggml_cast(ctx, lokr_w2, GGML_TYPE_F16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iter = lora_tensors.find(lokr_w2_a_name);
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w2_a = iter->second;
|
||||||
|
if (is_conv2d && lokr_w2_a->type != GGML_TYPE_F16) {
|
||||||
|
lokr_w2_a = ggml_cast(ctx, lokr_w2_a, GGML_TYPE_F16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iter = lora_tensors.find(lokr_w2_b_name);
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
lokr_w2_b = iter->second;
|
||||||
|
if (is_conv2d && lokr_w2_b->type != GGML_TYPE_F16) {
|
||||||
|
lokr_w2_b = ggml_cast(ctx, lokr_w2_b, GGML_TYPE_F16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int rank = 1;
|
||||||
|
if (lokr_w1_b) {
|
||||||
|
rank = (int)lokr_w1_b->ne[ggml_n_dims(lokr_w1_b) - 1];
|
||||||
|
}
|
||||||
|
if (lokr_w2_b) {
|
||||||
|
rank = (int)lokr_w2_b->ne[ggml_n_dims(lokr_w2_b) - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
float scale_value = 1.0f;
|
||||||
|
iter = lora_tensors.find(alpha_name);
|
||||||
|
if (iter != lora_tensors.end()) {
|
||||||
|
float alpha = ggml_ext_backend_tensor_get_f32(iter->second);
|
||||||
|
scale_value = alpha / rank;
|
||||||
|
applied_lora_tensors.insert(alpha_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rank == 1) {
|
||||||
|
scale_value = 1.0f;
|
||||||
|
}
|
||||||
|
scale_value *= multiplier;
|
||||||
|
|
||||||
|
auto curr_out_diff = ggml_ext_lokr_forward(ctx, x, lokr_w1, lokr_w1_a, lokr_w1_b, lokr_w2, lokr_w2_a, lokr_w2_b, is_conv2d, forward_params.conv2d, scale_value);
|
||||||
|
if (out_diff == nullptr) {
|
||||||
|
out_diff = curr_out_diff;
|
||||||
|
} else {
|
||||||
|
out_diff = ggml_concat(ctx, out_diff, curr_out_diff, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lokr_w1)
|
||||||
|
applied_lora_tensors.insert(lokr_w1_name);
|
||||||
|
if (lokr_w1_a)
|
||||||
|
applied_lora_tensors.insert(lokr_w1_a_name);
|
||||||
|
if (lokr_w1_b)
|
||||||
|
applied_lora_tensors.insert(lokr_w1_b_name);
|
||||||
|
if (lokr_w2)
|
||||||
|
applied_lora_tensors.insert(lokr_w2_name);
|
||||||
|
if (lokr_w2_a)
|
||||||
|
applied_lora_tensors.insert(lokr_w2_name);
|
||||||
|
if (lokr_w2_b)
|
||||||
|
applied_lora_tensors.insert(lokr_w2_b_name);
|
||||||
|
applied_lora_tensors.insert(alpha_name);
|
||||||
|
|
||||||
|
index++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// not a lokr, normal lora path
|
||||||
|
|
||||||
std::string lora_down_name = "lora." + key + ".lora_down";
|
std::string lora_down_name = "lora." + key + ".lora_down";
|
||||||
std::string lora_up_name = "lora." + key + ".lora_up";
|
std::string lora_up_name = "lora." + key + ".lora_up";
|
||||||
@ -525,9 +627,7 @@ struct LoraModel : public GGMLRunner {
|
|||||||
ggml_tensor* lora_mid = nullptr;
|
ggml_tensor* lora_mid = nullptr;
|
||||||
ggml_tensor* lora_down = nullptr;
|
ggml_tensor* lora_down = nullptr;
|
||||||
|
|
||||||
bool is_conv2d = forward_params.op_type == WeightAdapter::ForwardParams::op_type_t::OP_CONV2D;
|
iter = lora_tensors.find(lora_up_name);
|
||||||
|
|
||||||
auto iter = lora_tensors.find(lora_up_name);
|
|
||||||
if (iter != lora_tensors.end()) {
|
if (iter != lora_tensors.end()) {
|
||||||
lora_up = iter->second;
|
lora_up = iter->second;
|
||||||
if (is_conv2d && lora_up->type != GGML_TYPE_F16) {
|
if (is_conv2d && lora_up->type != GGML_TYPE_F16) {
|
||||||
@ -599,6 +699,8 @@ struct LoraModel : public GGMLRunner {
|
|||||||
forward_params.conv2d.d0,
|
forward_params.conv2d.d0,
|
||||||
forward_params.conv2d.d1,
|
forward_params.conv2d.d1,
|
||||||
forward_params.conv2d.direct,
|
forward_params.conv2d.direct,
|
||||||
|
forward_params.conv2d.circular_x,
|
||||||
|
forward_params.conv2d.circular_y,
|
||||||
forward_params.conv2d.scale);
|
forward_params.conv2d.scale);
|
||||||
if (lora_mid) {
|
if (lora_mid) {
|
||||||
lx = ggml_ext_conv_2d(ctx,
|
lx = ggml_ext_conv_2d(ctx,
|
||||||
@ -612,6 +714,8 @@ struct LoraModel : public GGMLRunner {
|
|||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
forward_params.conv2d.direct,
|
forward_params.conv2d.direct,
|
||||||
|
forward_params.conv2d.circular_x,
|
||||||
|
forward_params.conv2d.circular_y,
|
||||||
forward_params.conv2d.scale);
|
forward_params.conv2d.scale);
|
||||||
}
|
}
|
||||||
lx = ggml_ext_conv_2d(ctx,
|
lx = ggml_ext_conv_2d(ctx,
|
||||||
@ -625,10 +729,12 @@ struct LoraModel : public GGMLRunner {
|
|||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
forward_params.conv2d.direct,
|
forward_params.conv2d.direct,
|
||||||
|
forward_params.conv2d.circular_x,
|
||||||
|
forward_params.conv2d.circular_y,
|
||||||
forward_params.conv2d.scale);
|
forward_params.conv2d.scale);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto curr_out_diff = ggml_scale_inplace(ctx, lx, scale_value);
|
auto curr_out_diff = ggml_ext_scale(ctx, lx, scale_value, true);
|
||||||
|
|
||||||
if (out_diff == nullptr) {
|
if (out_diff == nullptr) {
|
||||||
out_diff = curr_out_diff;
|
out_diff = curr_out_diff;
|
||||||
@ -641,9 +747,9 @@ struct LoraModel : public GGMLRunner {
|
|||||||
return out_diff;
|
return out_diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_lora_graph(const std::map<std::string, ggml_tensor*>& model_tensors, SDVersion version) {
|
ggml_cgraph* build_lora_graph(const std::map<std::string, ggml_tensor*>& model_tensors, SDVersion version) {
|
||||||
size_t lora_graph_size = LORA_GRAPH_BASE_SIZE + lora_tensors.size() * 10;
|
size_t lora_graph_size = LORA_GRAPH_BASE_SIZE + lora_tensors.size() * 10;
|
||||||
struct ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, lora_graph_size, false);
|
ggml_cgraph* gf = ggml_new_graph_custom(compute_ctx, lora_graph_size, false);
|
||||||
|
|
||||||
preprocess_lora_tensors(model_tensors);
|
preprocess_lora_tensors(model_tensors);
|
||||||
|
|
||||||
@ -682,8 +788,8 @@ struct LoraModel : public GGMLRunner {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void apply(std::map<std::string, struct ggml_tensor*> model_tensors, SDVersion version, int n_threads) {
|
void apply(std::map<std::string, ggml_tensor*> model_tensors, SDVersion version, int n_threads) {
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_lora_graph(model_tensors, version);
|
return build_lora_graph(model_tensors, version);
|
||||||
};
|
};
|
||||||
GGMLRunner::compute(get_graph, n_threads, false);
|
GGMLRunner::compute(get_graph, n_threads, false);
|
||||||
@ -735,9 +841,9 @@ public:
|
|||||||
: lora_models(lora_models) {
|
: lora_models(lora_models) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name, bool with_lora) {
|
ggml_tensor* patch_weight(ggml_context* ctx, ggml_tensor* weight, const std::string& weight_name, bool with_lora_and_lokr) {
|
||||||
for (auto& lora_model : lora_models) {
|
for (auto& lora_model : lora_models) {
|
||||||
ggml_tensor* diff = lora_model->get_weight_diff(weight_name, ctx, weight, with_lora);
|
ggml_tensor* diff = lora_model->get_weight_diff(weight_name, ctx, weight, with_lora_and_lokr);
|
||||||
if (diff == nullptr) {
|
if (diff == nullptr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -779,6 +885,8 @@ public:
|
|||||||
forward_params.conv2d.d0,
|
forward_params.conv2d.d0,
|
||||||
forward_params.conv2d.d1,
|
forward_params.conv2d.d1,
|
||||||
forward_params.conv2d.direct,
|
forward_params.conv2d.direct,
|
||||||
|
forward_params.conv2d.circular_x,
|
||||||
|
forward_params.conv2d.circular_y,
|
||||||
forward_params.conv2d.scale);
|
forward_params.conv2d.scale);
|
||||||
}
|
}
|
||||||
for (auto& lora_model : lora_models) {
|
for (auto& lora_model : lora_models) {
|
||||||
@ -1,8 +1,7 @@
|
|||||||
#ifndef __LTXV_HPP__
|
#ifndef __LTXV_HPP__
|
||||||
#define __LTXV_HPP__
|
#define __LTXV_HPP__
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common_block.hpp"
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
|
|
||||||
namespace LTXV {
|
namespace LTXV {
|
||||||
|
|
||||||
@ -27,8 +26,8 @@ namespace LTXV {
|
|||||||
bias));
|
bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
bool causal = true) {
|
bool causal = true) {
|
||||||
// x: [N*IC, ID, IH, IW]
|
// x: [N*IC, ID, IH, IW]
|
||||||
// result: [N*OC, OD, OH, OW]
|
// result: [N*OC, OD, OH, OW]
|
||||||
@ -27,13 +27,13 @@ public:
|
|||||||
blocks["fc2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_features, out_features, bias));
|
blocks["fc2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_features, out_features, bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, n_token, in_features]
|
// x: [N, n_token, in_features]
|
||||||
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
||||||
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
||||||
|
|
||||||
x = fc1->forward(ctx, x);
|
x = fc1->forward(ctx, x);
|
||||||
x = ggml_gelu_inplace(ctx->ggml_ctx, x);
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
x = fc2->forward(ctx, x);
|
x = fc2->forward(ctx, x);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
@ -72,7 +72,7 @@ public:
|
|||||||
bias));
|
bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, C, H, W]
|
// x: [N, C, H, W]
|
||||||
// return: [N, H*W, embed_dim]
|
// return: [N, H*W, embed_dim]
|
||||||
auto proj = std::dynamic_pointer_cast<Conv2d>(blocks["proj"]);
|
auto proj = std::dynamic_pointer_cast<Conv2d>(blocks["proj"]);
|
||||||
@ -97,17 +97,21 @@ public:
|
|||||||
struct TimestepEmbedder : public GGMLBlock {
|
struct TimestepEmbedder : public GGMLBlock {
|
||||||
// Embeds scalar timesteps into vector representations.
|
// Embeds scalar timesteps into vector representations.
|
||||||
protected:
|
protected:
|
||||||
int64_t frequency_embedding_size;
|
int frequency_embedding_size;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
TimestepEmbedder(int64_t hidden_size,
|
TimestepEmbedder(int64_t hidden_size,
|
||||||
int64_t frequency_embedding_size = 256)
|
int frequency_embedding_size = 256,
|
||||||
|
int64_t out_channels = 0)
|
||||||
: frequency_embedding_size(frequency_embedding_size) {
|
: frequency_embedding_size(frequency_embedding_size) {
|
||||||
|
if (out_channels <= 0) {
|
||||||
|
out_channels = hidden_size;
|
||||||
|
}
|
||||||
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(frequency_embedding_size, hidden_size, true, true));
|
blocks["mlp.0"] = std::shared_ptr<GGMLBlock>(new Linear(frequency_embedding_size, hidden_size, true, true));
|
||||||
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true, true));
|
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, out_channels, true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* t) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* t) {
|
||||||
// t: [N, ]
|
// t: [N, ]
|
||||||
// return: [N, hidden_size]
|
// return: [N, hidden_size]
|
||||||
auto mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["mlp.0"]);
|
auto mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["mlp.0"]);
|
||||||
@ -131,7 +135,7 @@ public:
|
|||||||
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true, true));
|
blocks["mlp.2"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, hidden_size, true, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, input_dim]
|
// x: [N, input_dim]
|
||||||
// return: [N, hidden_size]
|
// return: [N, hidden_size]
|
||||||
auto mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["mlp.0"]);
|
auto mlp_0 = std::dynamic_pointer_cast<Linear>(blocks["mlp.0"]);
|
||||||
@ -163,15 +167,15 @@ public:
|
|||||||
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim));
|
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim, dim));
|
||||||
}
|
}
|
||||||
if (qk_norm == "rms") {
|
if (qk_norm == "rms") {
|
||||||
blocks["ln_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(d_head, 1.0e-6));
|
blocks["ln_q"] = std::shared_ptr<GGMLBlock>(new RMSNorm(d_head, 1.0e-6f));
|
||||||
blocks["ln_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(d_head, 1.0e-6));
|
blocks["ln_k"] = std::shared_ptr<GGMLBlock>(new RMSNorm(d_head, 1.0e-6f));
|
||||||
} else if (qk_norm == "ln") {
|
} else if (qk_norm == "ln") {
|
||||||
blocks["ln_q"] = std::shared_ptr<GGMLBlock>(new LayerNorm(d_head, 1.0e-6));
|
blocks["ln_q"] = std::shared_ptr<GGMLBlock>(new LayerNorm(d_head, 1.0e-6f));
|
||||||
blocks["ln_k"] = std::shared_ptr<GGMLBlock>(new LayerNorm(d_head, 1.0e-6));
|
blocks["ln_k"] = std::shared_ptr<GGMLBlock>(new LayerNorm(d_head, 1.0e-6f));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> pre_attention(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
std::vector<ggml_tensor*> pre_attention(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
|
auto qkv_proj = std::dynamic_pointer_cast<Linear>(blocks["qkv"]);
|
||||||
|
|
||||||
auto qkv = qkv_proj->forward(ctx, x);
|
auto qkv = qkv_proj->forward(ctx, x);
|
||||||
@ -194,7 +198,7 @@ public:
|
|||||||
return {q, k, v};
|
return {q, k, v};
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* post_attention(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* post_attention(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
GGML_ASSERT(!pre_only);
|
GGML_ASSERT(!pre_only);
|
||||||
|
|
||||||
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
|
||||||
@ -204,19 +208,19 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// x: [N, n_token, dim]
|
// x: [N, n_token, dim]
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x) {
|
ggml_tensor* x) {
|
||||||
auto qkv = pre_attention(ctx, x);
|
auto qkv = pre_attention(ctx, x);
|
||||||
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
x = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
||||||
x = post_attention(ctx, x); // [N, n_token, dim]
|
x = post_attention(ctx, x); // [N, n_token, dim]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
__STATIC_INLINE__ struct ggml_tensor* modulate(struct ggml_context* ctx,
|
__STATIC_INLINE__ ggml_tensor* modulate(ggml_context* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* shift,
|
ggml_tensor* shift,
|
||||||
struct ggml_tensor* scale) {
|
ggml_tensor* scale) {
|
||||||
// x: [N, L, C]
|
// x: [N, L, C]
|
||||||
// scale: [N, C]
|
// scale: [N, C]
|
||||||
// shift: [N, C]
|
// shift: [N, C]
|
||||||
@ -270,8 +274,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<std::vector<ggml_tensor*>, std::vector<ggml_tensor*>, std::vector<ggml_tensor*>> pre_attention_x(GGMLRunnerContext* ctx,
|
std::tuple<std::vector<ggml_tensor*>, std::vector<ggml_tensor*>, std::vector<ggml_tensor*>> pre_attention_x(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
GGML_ASSERT(self_attn);
|
GGML_ASSERT(self_attn);
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// c: [N, hidden_size]
|
// c: [N, hidden_size]
|
||||||
@ -280,23 +284,19 @@ public:
|
|||||||
auto attn2 = std::dynamic_pointer_cast<SelfAttention>(blocks["attn2"]);
|
auto attn2 = std::dynamic_pointer_cast<SelfAttention>(blocks["attn2"]);
|
||||||
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
||||||
|
|
||||||
int64_t n_mods = 9;
|
int n_mods = 9;
|
||||||
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, n_mods * hidden_size]
|
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, n_mods * hidden_size]
|
||||||
m = ggml_reshape_3d(ctx->ggml_ctx, m, c->ne[0], n_mods, c->ne[1]); // [N, n_mods, hidden_size]
|
auto m_vec = ggml_ext_chunk(ctx->ggml_ctx, m, n_mods, 0);
|
||||||
m = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, m, 0, 2, 1, 3)); // [n_mods, N, hidden_size]
|
|
||||||
|
|
||||||
int64_t offset = m->nb[1] * m->ne[1];
|
auto shift_msa = m_vec[0]; // [N, hidden_size]
|
||||||
auto shift_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 0); // [N, hidden_size]
|
auto scale_msa = m_vec[1]; // [N, hidden_size]
|
||||||
auto scale_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 1); // [N, hidden_size]
|
auto gate_msa = m_vec[2]; // [N, hidden_size]
|
||||||
auto gate_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 2); // [N, hidden_size]
|
auto shift_mlp = m_vec[3]; // [N, hidden_size]
|
||||||
|
auto scale_mlp = m_vec[4]; // [N, hidden_size]
|
||||||
auto shift_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 3); // [N, hidden_size]
|
auto gate_mlp = m_vec[5]; // [N, hidden_size]
|
||||||
auto scale_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 4); // [N, hidden_size]
|
auto shift_msa2 = m_vec[6]; // [N, hidden_size]
|
||||||
auto gate_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 5); // [N, hidden_size]
|
auto scale_msa2 = m_vec[7]; // [N, hidden_size]
|
||||||
|
auto gate_msa2 = m_vec[8]; // [N, hidden_size]
|
||||||
auto shift_msa2 = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 6); // [N, hidden_size]
|
|
||||||
auto scale_msa2 = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 7); // [N, hidden_size]
|
|
||||||
auto gate_msa2 = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 8); // [N, hidden_size]
|
|
||||||
|
|
||||||
auto x_norm = norm1->forward(ctx, x);
|
auto x_norm = norm1->forward(ctx, x);
|
||||||
|
|
||||||
@ -309,31 +309,29 @@ public:
|
|||||||
return {qkv, qkv2, {x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2}};
|
return {qkv, qkv2, {x, gate_msa, shift_mlp, scale_mlp, gate_mlp, gate_msa2}};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::vector<struct ggml_tensor*>, std::vector<struct ggml_tensor*>> pre_attention(GGMLRunnerContext* ctx,
|
std::pair<std::vector<ggml_tensor*>, std::vector<ggml_tensor*>> pre_attention(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// c: [N, hidden_size]
|
// c: [N, hidden_size]
|
||||||
auto norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["norm1"]);
|
auto norm1 = std::dynamic_pointer_cast<LayerNorm>(blocks["norm1"]);
|
||||||
auto attn = std::dynamic_pointer_cast<SelfAttention>(blocks["attn"]);
|
auto attn = std::dynamic_pointer_cast<SelfAttention>(blocks["attn"]);
|
||||||
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
||||||
|
|
||||||
int64_t n_mods = 6;
|
int n_mods = 6;
|
||||||
if (pre_only) {
|
if (pre_only) {
|
||||||
n_mods = 2;
|
n_mods = 2;
|
||||||
}
|
}
|
||||||
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, n_mods * hidden_size]
|
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, n_mods * hidden_size]
|
||||||
m = ggml_reshape_3d(ctx->ggml_ctx, m, c->ne[0], n_mods, c->ne[1]); // [N, n_mods, hidden_size]
|
auto m_vec = ggml_ext_chunk(ctx->ggml_ctx, m, n_mods, 0);
|
||||||
m = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, m, 0, 2, 1, 3)); // [n_mods, N, hidden_size]
|
|
||||||
|
|
||||||
int64_t offset = m->nb[1] * m->ne[1];
|
auto shift_msa = m_vec[0]; // [N, hidden_size]
|
||||||
auto shift_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 0); // [N, hidden_size]
|
auto scale_msa = m_vec[1]; // [N, hidden_size]
|
||||||
auto scale_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 1); // [N, hidden_size]
|
|
||||||
if (!pre_only) {
|
if (!pre_only) {
|
||||||
auto gate_msa = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 2); // [N, hidden_size]
|
auto gate_msa = m_vec[2]; // [N, hidden_size]
|
||||||
auto shift_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 3); // [N, hidden_size]
|
auto shift_mlp = m_vec[3]; // [N, hidden_size]
|
||||||
auto scale_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 4); // [N, hidden_size]
|
auto scale_mlp = m_vec[4]; // [N, hidden_size]
|
||||||
auto gate_mlp = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 5); // [N, hidden_size]
|
auto gate_mlp = m_vec[5]; // [N, hidden_size]
|
||||||
|
|
||||||
auto attn_in = modulate(ctx->ggml_ctx, norm1->forward(ctx, x), shift_msa, scale_msa);
|
auto attn_in = modulate(ctx->ggml_ctx, norm1->forward(ctx, x), shift_msa, scale_msa);
|
||||||
|
|
||||||
@ -348,15 +346,15 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* post_attention_x(GGMLRunnerContext* ctx,
|
ggml_tensor* post_attention_x(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* attn_out,
|
ggml_tensor* attn_out,
|
||||||
struct ggml_tensor* attn2_out,
|
ggml_tensor* attn2_out,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* gate_msa,
|
ggml_tensor* gate_msa,
|
||||||
struct ggml_tensor* shift_mlp,
|
ggml_tensor* shift_mlp,
|
||||||
struct ggml_tensor* scale_mlp,
|
ggml_tensor* scale_mlp,
|
||||||
struct ggml_tensor* gate_mlp,
|
ggml_tensor* gate_mlp,
|
||||||
struct ggml_tensor* gate_msa2) {
|
ggml_tensor* gate_msa2) {
|
||||||
// attn_out: [N, n_token, hidden_size]
|
// attn_out: [N, n_token, hidden_size]
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// gate_msa: [N, hidden_size]
|
// gate_msa: [N, hidden_size]
|
||||||
@ -386,13 +384,13 @@ public:
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* post_attention(GGMLRunnerContext* ctx,
|
ggml_tensor* post_attention(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* attn_out,
|
ggml_tensor* attn_out,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* gate_msa,
|
ggml_tensor* gate_msa,
|
||||||
struct ggml_tensor* shift_mlp,
|
ggml_tensor* shift_mlp,
|
||||||
struct ggml_tensor* scale_mlp,
|
ggml_tensor* scale_mlp,
|
||||||
struct ggml_tensor* gate_mlp) {
|
ggml_tensor* gate_mlp) {
|
||||||
// attn_out: [N, n_token, hidden_size]
|
// attn_out: [N, n_token, hidden_size]
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// gate_msa: [N, hidden_size]
|
// gate_msa: [N, hidden_size]
|
||||||
@ -418,9 +416,9 @@ public:
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// c: [N, hidden_size]
|
// c: [N, hidden_size]
|
||||||
// return: [N, n_token, hidden_size]
|
// return: [N, n_token, hidden_size]
|
||||||
@ -435,8 +433,8 @@ public:
|
|||||||
auto qkv2 = std::get<1>(qkv_intermediates);
|
auto qkv2 = std::get<1>(qkv_intermediates);
|
||||||
auto intermediates = std::get<2>(qkv_intermediates);
|
auto intermediates = std::get<2>(qkv_intermediates);
|
||||||
|
|
||||||
auto attn_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
auto attn_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
||||||
auto attn2_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv2[0], qkv2[1], qkv2[2], num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
auto attn2_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv2[0], qkv2[1], qkv2[2], num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
||||||
x = post_attention_x(ctx,
|
x = post_attention_x(ctx,
|
||||||
attn_out,
|
attn_out,
|
||||||
attn2_out,
|
attn2_out,
|
||||||
@ -452,7 +450,7 @@ public:
|
|||||||
auto qkv = qkv_intermediates.first;
|
auto qkv = qkv_intermediates.first;
|
||||||
auto intermediates = qkv_intermediates.second;
|
auto intermediates = qkv_intermediates.second;
|
||||||
|
|
||||||
auto attn_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
auto attn_out = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, dim]
|
||||||
x = post_attention(ctx,
|
x = post_attention(ctx,
|
||||||
attn_out,
|
attn_out,
|
||||||
intermediates[0],
|
intermediates[0],
|
||||||
@ -465,11 +463,11 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
__STATIC_INLINE__ std::pair<struct ggml_tensor*, struct ggml_tensor*>
|
__STATIC_INLINE__ std::pair<ggml_tensor*, ggml_tensor*>
|
||||||
block_mixing(GGMLRunnerContext* ctx,
|
block_mixing(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c,
|
ggml_tensor* c,
|
||||||
std::shared_ptr<DismantledBlock> context_block,
|
std::shared_ptr<DismantledBlock> context_block,
|
||||||
std::shared_ptr<DismantledBlock> x_block) {
|
std::shared_ptr<DismantledBlock> x_block) {
|
||||||
// context: [N, n_context, hidden_size]
|
// context: [N, n_context, hidden_size]
|
||||||
@ -491,31 +489,29 @@ block_mixing(GGMLRunnerContext* ctx,
|
|||||||
x_qkv = x_qkv_intermediates.first;
|
x_qkv = x_qkv_intermediates.first;
|
||||||
x_intermediates = x_qkv_intermediates.second;
|
x_intermediates = x_qkv_intermediates.second;
|
||||||
}
|
}
|
||||||
std::vector<struct ggml_tensor*> qkv;
|
std::vector<ggml_tensor*> qkv;
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
qkv.push_back(ggml_concat(ctx->ggml_ctx, context_qkv[i], x_qkv[i], 1));
|
qkv.push_back(ggml_concat(ctx->ggml_ctx, context_qkv[i], x_qkv[i], 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto attn = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_context + n_token, hidden_size]
|
auto attn = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, qkv[0], qkv[1], qkv[2], x_block->num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_context + n_token, hidden_size]
|
||||||
attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, attn, 0, 2, 1, 3)); // [n_context + n_token, N, hidden_size]
|
|
||||||
auto context_attn = ggml_view_3d(ctx->ggml_ctx,
|
auto context_attn = ggml_view_3d(ctx->ggml_ctx,
|
||||||
attn,
|
attn,
|
||||||
attn->ne[0],
|
attn->ne[0],
|
||||||
attn->ne[1],
|
|
||||||
context->ne[1],
|
context->ne[1],
|
||||||
|
attn->ne[2],
|
||||||
attn->nb[1],
|
attn->nb[1],
|
||||||
attn->nb[2],
|
attn->nb[2],
|
||||||
0); // [n_context, N, hidden_size]
|
0); // [N, n_context, hidden_size]
|
||||||
context_attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, context_attn, 0, 2, 1, 3)); // [N, n_context, hidden_size]
|
|
||||||
auto x_attn = ggml_view_3d(ctx->ggml_ctx,
|
auto x_attn = ggml_view_3d(ctx->ggml_ctx,
|
||||||
attn,
|
attn,
|
||||||
attn->ne[0],
|
attn->ne[0],
|
||||||
attn->ne[1],
|
|
||||||
x->ne[1],
|
x->ne[1],
|
||||||
|
attn->ne[2],
|
||||||
attn->nb[1],
|
attn->nb[1],
|
||||||
attn->nb[2],
|
attn->nb[2],
|
||||||
attn->nb[2] * context->ne[1]); // [n_token, N, hidden_size]
|
context->ne[1] * attn->nb[1]); // [N, n_token, hidden_size]
|
||||||
x_attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, x_attn, 0, 2, 1, 3)); // [N, n_token, hidden_size]
|
|
||||||
|
|
||||||
if (!context_block->pre_only) {
|
if (!context_block->pre_only) {
|
||||||
context = context_block->post_attention(ctx,
|
context = context_block->post_attention(ctx,
|
||||||
@ -530,7 +526,7 @@ block_mixing(GGMLRunnerContext* ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (x_block->self_attn) {
|
if (x_block->self_attn) {
|
||||||
auto attn2 = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, x_qkv2[0], x_qkv2[1], x_qkv2[2], x_block->num_heads, nullptr, false, false, ctx->flash_attn_enabled); // [N, n_token, hidden_size]
|
auto attn2 = ggml_ext_attention_ext(ctx->ggml_ctx, ctx->backend, x_qkv2[0], x_qkv2[1], x_qkv2[2], x_block->num_heads, nullptr, false, ctx->flash_attn_enabled); // [N, n_token, hidden_size]
|
||||||
|
|
||||||
x = x_block->post_attention_x(ctx,
|
x = x_block->post_attention_x(ctx,
|
||||||
x_attn,
|
x_attn,
|
||||||
@ -567,10 +563,10 @@ public:
|
|||||||
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x));
|
blocks["x_block"] = std::shared_ptr<GGMLBlock>(new DismantledBlock(hidden_size, num_heads, mlp_ratio, qk_norm, qkv_bias, false, self_attn_x));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<struct ggml_tensor*, struct ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
auto context_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["context_block"]);
|
auto context_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["context_block"]);
|
||||||
auto x_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["x_block"]);
|
auto x_block = std::dynamic_pointer_cast<DismantledBlock>(blocks["x_block"]);
|
||||||
|
|
||||||
@ -590,9 +586,9 @@ public:
|
|||||||
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
blocks["adaLN_modulation.1"] = std::shared_ptr<GGMLBlock>(new Linear(hidden_size, 2 * hidden_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// c: [N, hidden_size]
|
// c: [N, hidden_size]
|
||||||
// return: [N, n_token, patch_size * patch_size * out_channels]
|
// return: [N, n_token, patch_size * patch_size * out_channels]
|
||||||
@ -601,12 +597,9 @@ public:
|
|||||||
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
auto adaLN_modulation_1 = std::dynamic_pointer_cast<Linear>(blocks["adaLN_modulation.1"]);
|
||||||
|
|
||||||
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, 2 * hidden_size]
|
auto m = adaLN_modulation_1->forward(ctx, ggml_silu(ctx->ggml_ctx, c)); // [N, 2 * hidden_size]
|
||||||
m = ggml_reshape_3d(ctx->ggml_ctx, m, c->ne[0], 2, c->ne[1]); // [N, 2, hidden_size]
|
auto m_vec = ggml_ext_chunk(ctx->ggml_ctx, m, 2, 0);
|
||||||
m = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, m, 0, 2, 1, 3)); // [2, N, hidden_size]
|
auto shift = m_vec[0]; // [N, hidden_size]
|
||||||
|
auto scale = m_vec[1]; // [N, hidden_size]
|
||||||
int64_t offset = m->nb[1] * m->ne[1];
|
|
||||||
auto shift = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 0); // [N, hidden_size]
|
|
||||||
auto scale = ggml_view_2d(ctx->ggml_ctx, m, m->ne[0], m->ne[1], m->nb[1], offset * 1); // [N, hidden_size]
|
|
||||||
|
|
||||||
x = modulate(ctx->ggml_ctx, norm_final->forward(ctx, x), shift, scale);
|
x = modulate(ctx->ggml_ctx, norm_final->forward(ctx, x), shift, scale);
|
||||||
x = linear->forward(ctx, x);
|
x = linear->forward(ctx, x);
|
||||||
@ -619,7 +612,7 @@ struct MMDiT : public GGMLBlock {
|
|||||||
// Diffusion model with a Transformer backbone.
|
// Diffusion model with a Transformer backbone.
|
||||||
protected:
|
protected:
|
||||||
int64_t input_size = -1;
|
int64_t input_size = -1;
|
||||||
int64_t patch_size = 2;
|
int patch_size = 2;
|
||||||
int64_t in_channels = 16;
|
int64_t in_channels = 16;
|
||||||
int64_t d_self = -1; // >=0 for MMdiT-X
|
int64_t d_self = -1; // >=0 for MMdiT-X
|
||||||
int64_t depth = 24;
|
int64_t depth = 24;
|
||||||
@ -633,7 +626,7 @@ protected:
|
|||||||
int64_t hidden_size;
|
int64_t hidden_size;
|
||||||
std::string qk_norm;
|
std::string qk_norm;
|
||||||
|
|
||||||
void init_params(struct ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, std::string prefix = "") override {
|
void init_params(ggml_context* ctx, const String2TensorStorage& tensor_storage_map = {}, std::string prefix = "") override {
|
||||||
enum ggml_type wtype = GGML_TYPE_F32;
|
enum ggml_type wtype = GGML_TYPE_F32;
|
||||||
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
|
params["pos_embed"] = ggml_new_tensor_3d(ctx, wtype, hidden_size, num_patchs, 1);
|
||||||
}
|
}
|
||||||
@ -712,8 +705,8 @@ public:
|
|||||||
blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new FinalLayer(hidden_size, patch_size, out_channels));
|
blocks["final_layer"] = std::shared_ptr<GGMLBlock>(new FinalLayer(hidden_size, patch_size, out_channels));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor*
|
ggml_tensor*
|
||||||
cropped_pos_embed(struct ggml_context* ctx,
|
cropped_pos_embed(ggml_context* ctx,
|
||||||
int64_t h,
|
int64_t h,
|
||||||
int64_t w) {
|
int64_t w) {
|
||||||
auto pos_embed = params["pos_embed"];
|
auto pos_embed = params["pos_embed"];
|
||||||
@ -752,32 +745,10 @@ public:
|
|||||||
return spatial_pos_embed;
|
return spatial_pos_embed;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* unpatchify(struct ggml_context* ctx,
|
ggml_tensor* forward_core_with_concat(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
int64_t h,
|
ggml_tensor* c_mod,
|
||||||
int64_t w) {
|
ggml_tensor* context,
|
||||||
// x: [N, H*W, patch_size * patch_size * C]
|
|
||||||
// return: [N, C, H, W]
|
|
||||||
int64_t n = x->ne[2];
|
|
||||||
int64_t c = out_channels;
|
|
||||||
int64_t p = patch_size;
|
|
||||||
h = (h + 1) / p;
|
|
||||||
w = (w + 1) / p;
|
|
||||||
|
|
||||||
GGML_ASSERT(h * w == x->ne[1]);
|
|
||||||
|
|
||||||
x = ggml_reshape_4d(ctx, x, c, p * p, w * h, n); // [N, H*W, P*P, C]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 2, 0, 1, 3)); // [N, C, H*W, P*P]
|
|
||||||
x = ggml_reshape_4d(ctx, x, p, p, w, h * c * n); // [N*C*H, W, P, P]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*H, P, W, P]
|
|
||||||
x = ggml_reshape_4d(ctx, x, p * w, p * h, c, n); // [N, C, H*P, W*P]
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor* forward_core_with_concat(GGMLRunnerContext* ctx,
|
|
||||||
struct ggml_tensor* x,
|
|
||||||
struct ggml_tensor* c_mod,
|
|
||||||
struct ggml_tensor* context,
|
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
// x: [N, H*W, hidden_size]
|
// x: [N, H*W, hidden_size]
|
||||||
// context: [N, n_context, d_context]
|
// context: [N, n_context, d_context]
|
||||||
@ -803,11 +774,11 @@ public:
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* t,
|
ggml_tensor* t,
|
||||||
struct ggml_tensor* y = nullptr,
|
ggml_tensor* y = nullptr,
|
||||||
struct ggml_tensor* context = nullptr,
|
ggml_tensor* context = nullptr,
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
// Forward pass of DiT.
|
// Forward pass of DiT.
|
||||||
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
// x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
|
||||||
@ -818,11 +789,11 @@ public:
|
|||||||
auto x_embedder = std::dynamic_pointer_cast<PatchEmbed>(blocks["x_embedder"]);
|
auto x_embedder = std::dynamic_pointer_cast<PatchEmbed>(blocks["x_embedder"]);
|
||||||
auto t_embedder = std::dynamic_pointer_cast<TimestepEmbedder>(blocks["t_embedder"]);
|
auto t_embedder = std::dynamic_pointer_cast<TimestepEmbedder>(blocks["t_embedder"]);
|
||||||
|
|
||||||
int64_t w = x->ne[0];
|
int64_t W = x->ne[0];
|
||||||
int64_t h = x->ne[1];
|
int64_t H = x->ne[1];
|
||||||
|
|
||||||
auto patch_embed = x_embedder->forward(ctx, x); // [N, H*W, hidden_size]
|
auto patch_embed = x_embedder->forward(ctx, x); // [N, H*W, hidden_size]
|
||||||
auto pos_embed = cropped_pos_embed(ctx->ggml_ctx, h, w); // [1, H*W, hidden_size]
|
auto pos_embed = cropped_pos_embed(ctx->ggml_ctx, H, W); // [1, H*W, hidden_size]
|
||||||
x = ggml_add(ctx->ggml_ctx, patch_embed, pos_embed); // [N, H*W, hidden_size]
|
x = ggml_add(ctx->ggml_ctx, patch_embed, pos_embed); // [N, H*W, hidden_size]
|
||||||
|
|
||||||
auto c = t_embedder->forward(ctx, t); // [N, hidden_size]
|
auto c = t_embedder->forward(ctx, t); // [N, hidden_size]
|
||||||
@ -841,7 +812,7 @@ public:
|
|||||||
|
|
||||||
x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)
|
x = forward_core_with_concat(ctx, x, c, context, skip_layers); // (N, H*W, patch_size ** 2 * out_channels)
|
||||||
|
|
||||||
x = unpatchify(ctx->ggml_ctx, x, h, w); // [N, C, H, W]
|
x = DiT::unpatchify_and_crop(ctx->ggml_ctx, x, H, W, patch_size, patch_size, /*patch_last*/ false); // [N, C, H, W]
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
@ -861,16 +832,16 @@ struct MMDiTRunner : public GGMLRunner {
|
|||||||
return "mmdit";
|
return "mmdit";
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
mmdit.get_param_tensors(tensors, prefix);
|
mmdit.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
|
ggml_cgraph* build_graph(ggml_tensor* x,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* y,
|
ggml_tensor* y,
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
struct ggml_cgraph* gf = new_graph_custom(MMDIT_GRAPH_SIZE);
|
ggml_cgraph* gf = new_graph_custom(MMDIT_GRAPH_SIZE);
|
||||||
|
|
||||||
x = to_backend(x);
|
x = to_backend(x);
|
||||||
context = to_backend(context);
|
context = to_backend(context);
|
||||||
@ -878,7 +849,7 @@ struct MMDiTRunner : public GGMLRunner {
|
|||||||
timesteps = to_backend(timesteps);
|
timesteps = to_backend(timesteps);
|
||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
struct ggml_tensor* out = mmdit.forward(&runner_ctx,
|
ggml_tensor* out = mmdit.forward(&runner_ctx,
|
||||||
x,
|
x,
|
||||||
timesteps,
|
timesteps,
|
||||||
y,
|
y,
|
||||||
@ -890,32 +861,32 @@ struct MMDiTRunner : public GGMLRunner {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
bool compute(int n_threads,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* y,
|
ggml_tensor* y,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr,
|
ggml_context* output_ctx = nullptr,
|
||||||
std::vector<int> skip_layers = std::vector<int>()) {
|
std::vector<int> skip_layers = std::vector<int>()) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
// timesteps: [N, ]
|
// timesteps: [N, ]
|
||||||
// context: [N, max_position, hidden_size]([N, 154, 4096]) or [1, max_position, hidden_size]
|
// context: [N, max_position, hidden_size]([N, 154, 4096]) or [1, max_position, hidden_size]
|
||||||
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
// y: [N, adm_in_channels] or [1, adm_in_channels]
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(x, timesteps, context, y, skip_layers);
|
return build_graph(x, timesteps, context, y, skip_layers);
|
||||||
};
|
};
|
||||||
|
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test() {
|
void test() {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10 MB
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
struct ggml_context* work_ctx = ggml_init(params);
|
ggml_context* work_ctx = ggml_init(params);
|
||||||
GGML_ASSERT(work_ctx != nullptr);
|
GGML_ASSERT(work_ctx != nullptr);
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -937,14 +908,14 @@ struct MMDiTRunner : public GGMLRunner {
|
|||||||
ggml_set_f32(y, 0.01f);
|
ggml_set_f32(y, 0.01f);
|
||||||
// print_ggml_tensor(y);
|
// print_ggml_tensor(y);
|
||||||
|
|
||||||
struct ggml_tensor* out = nullptr;
|
ggml_tensor* out = nullptr;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
compute(8, x, timesteps, context, y, &out, work_ctx);
|
compute(8, x, timesteps, context, y, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int64_t t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
LOG_DEBUG("mmdit test done in %dms", t1 - t0);
|
LOG_DEBUG("mmdit test done in %lldms", t1 - t0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -16,9 +16,6 @@
|
|||||||
#include "model.h"
|
#include "model.h"
|
||||||
#include "stable-diffusion.h"
|
#include "stable-diffusion.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "vocab.hpp"
|
|
||||||
#include "vocab_qwen.hpp"
|
|
||||||
#include "vocab_umt5.hpp"
|
|
||||||
|
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
#include "ggml-backend.h"
|
#include "ggml-backend.h"
|
||||||
@ -102,10 +99,15 @@ const char* unused_tensors[] = {
|
|||||||
"model_ema.diffusion_model",
|
"model_ema.diffusion_model",
|
||||||
"embedding_manager",
|
"embedding_manager",
|
||||||
"denoiser.sigmas",
|
"denoiser.sigmas",
|
||||||
"edm_vpred.sigma_max",
|
|
||||||
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
|
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
|
||||||
"text_encoders.qwen2vl.output.weight",
|
"ztsnr", // Found in some SDXL vpred models
|
||||||
"text_encoders.qwen2vl.lm_head.",
|
"edm_vpred.sigma_min", // Found in CosXL
|
||||||
|
// TODO: find another way to avoid the "unknown tensor" for these two
|
||||||
|
// "edm_vpred.sigma_max", // Used to detect CosXL
|
||||||
|
// "v_pred", // Used to detect SDXL vpred models
|
||||||
|
"text_encoders.llm.output.weight",
|
||||||
|
"text_encoders.llm.lm_head.",
|
||||||
|
"first_stage_model.bn.",
|
||||||
};
|
};
|
||||||
|
|
||||||
bool is_unused_tensor(std::string name) {
|
bool is_unused_tensor(std::string name) {
|
||||||
@ -117,11 +119,6 @@ bool is_unused_tensor(std::string name) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
float bf16_to_f32(uint16_t bfloat16) {
|
|
||||||
uint32_t val_bits = (static_cast<uint32_t>(bfloat16) << 16);
|
|
||||||
return *reinterpret_cast<float*>(&val_bits);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t f8_e4m3_to_f16(uint8_t f8) {
|
uint16_t f8_e4m3_to_f16(uint8_t f8) {
|
||||||
// do we need to support uz?
|
// do we need to support uz?
|
||||||
|
|
||||||
@ -204,13 +201,6 @@ uint16_t f8_e5m2_to_f16(uint8_t fp8) {
|
|||||||
return fp16_sign | (fp16_exponent << 10) | fp16_mantissa;
|
return fp16_sign | (fp16_exponent << 10) | fp16_mantissa;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bf16_to_f32_vec(uint16_t* src, float* dst, int64_t n) {
|
|
||||||
// support inplace op
|
|
||||||
for (int64_t i = n - 1; i >= 0; i--) {
|
|
||||||
dst[i] = bf16_to_f32(src[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
|
void f8_e4m3_to_f16_vec(uint8_t* src, uint16_t* dst, int64_t n) {
|
||||||
// support inplace op
|
// support inplace op
|
||||||
for (int64_t i = n - 1; i >= 0; i--) {
|
for (int64_t i = n - 1; i >= 0; i--) {
|
||||||
@ -263,7 +253,7 @@ void convert_tensor(void* src,
|
|||||||
} else {
|
} else {
|
||||||
auto qtype = ggml_get_type_traits(src_type);
|
auto qtype = ggml_get_type_traits(src_type);
|
||||||
if (qtype->to_float == nullptr) {
|
if (qtype->to_float == nullptr) {
|
||||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
throw std::runtime_error(sd_format("type %s unsupported for integer quantization: no dequantization available",
|
||||||
ggml_type_name(src_type)));
|
ggml_type_name(src_type)));
|
||||||
}
|
}
|
||||||
qtype->to_float(src, (float*)dst, n);
|
qtype->to_float(src, (float*)dst, n);
|
||||||
@ -273,7 +263,7 @@ void convert_tensor(void* src,
|
|||||||
// src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
|
// src_type is quantized => dst_type == GGML_TYPE_F16 or dst_type is quantized
|
||||||
auto qtype = ggml_get_type_traits(src_type);
|
auto qtype = ggml_get_type_traits(src_type);
|
||||||
if (qtype->to_float == nullptr) {
|
if (qtype->to_float == nullptr) {
|
||||||
throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available",
|
throw std::runtime_error(sd_format("type %s unsupported for integer quantization: no dequantization available",
|
||||||
ggml_type_name(src_type)));
|
ggml_type_name(src_type)));
|
||||||
}
|
}
|
||||||
std::vector<char> buf;
|
std::vector<char> buf;
|
||||||
@ -297,7 +287,7 @@ void ModelLoader::add_tensor_storage(const TensorStorage& tensor_storage) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool is_zip_file(const std::string& file_path) {
|
bool is_zip_file(const std::string& file_path) {
|
||||||
struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
||||||
if (zip == nullptr) {
|
if (zip == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -382,7 +372,11 @@ bool ModelLoader::init_from_file(const std::string& file_path, const std::string
|
|||||||
LOG_INFO("load %s using checkpoint format", file_path.c_str());
|
LOG_INFO("load %s using checkpoint format", file_path.c_str());
|
||||||
return init_from_ckpt_file(file_path, prefix);
|
return init_from_ckpt_file(file_path, prefix);
|
||||||
} else {
|
} else {
|
||||||
|
if (file_exists(file_path)) {
|
||||||
LOG_WARN("unknown format %s", file_path.c_str());
|
LOG_WARN("unknown format %s", file_path.c_str());
|
||||||
|
} else {
|
||||||
|
LOG_WARN("file %s not found", file_path.c_str());
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -442,7 +436,7 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||||||
name,
|
name,
|
||||||
gguf_tensor_info.type,
|
gguf_tensor_info.type,
|
||||||
gguf_tensor_info.shape.data(),
|
gguf_tensor_info.shape.data(),
|
||||||
gguf_tensor_info.shape.size(),
|
static_cast<int>(gguf_tensor_info.shape.size()),
|
||||||
file_index,
|
file_index,
|
||||||
data_offset + gguf_tensor_info.offset);
|
data_offset + gguf_tensor_info.offset);
|
||||||
|
|
||||||
@ -454,13 +448,13 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_tensors = gguf_get_n_tensors(ctx_gguf_);
|
int n_tensors = static_cast<int>(gguf_get_n_tensors(ctx_gguf_));
|
||||||
|
|
||||||
size_t total_size = 0;
|
size_t total_size = 0;
|
||||||
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
|
size_t data_offset = gguf_get_data_offset(ctx_gguf_);
|
||||||
for (int i = 0; i < n_tensors; i++) {
|
for (int i = 0; i < n_tensors; i++) {
|
||||||
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
|
std::string name = gguf_get_tensor_name(ctx_gguf_, i);
|
||||||
struct ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
|
ggml_tensor* dummy = ggml_get_tensor(ctx_meta_, name.c_str());
|
||||||
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
|
size_t offset = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
|
||||||
|
|
||||||
// LOG_DEBUG("%s", name.c_str());
|
// LOG_DEBUG("%s", name.c_str());
|
||||||
@ -489,7 +483,7 @@ ggml_type str_to_ggml_type(const std::string& dtype) {
|
|||||||
if (dtype == "F16") {
|
if (dtype == "F16") {
|
||||||
ttype = GGML_TYPE_F16;
|
ttype = GGML_TYPE_F16;
|
||||||
} else if (dtype == "BF16") {
|
} else if (dtype == "BF16") {
|
||||||
ttype = GGML_TYPE_F32;
|
ttype = GGML_TYPE_BF16;
|
||||||
} else if (dtype == "F32") {
|
} else if (dtype == "F32") {
|
||||||
ttype = GGML_TYPE_F32;
|
ttype = GGML_TYPE_F32;
|
||||||
} else if (dtype == "F64") {
|
} else if (dtype == "F64") {
|
||||||
@ -617,10 +611,7 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||||||
|
|
||||||
size_t tensor_data_size = end - begin;
|
size_t tensor_data_size = end - begin;
|
||||||
|
|
||||||
if (dtype == "BF16") {
|
if (dtype == "F8_E4M3") {
|
||||||
tensor_storage.is_bf16 = true;
|
|
||||||
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
|
|
||||||
} else if (dtype == "F8_E4M3") {
|
|
||||||
tensor_storage.is_f8_e4m3 = true;
|
tensor_storage.is_f8_e4m3 = true;
|
||||||
// f8 -> f16
|
// f8 -> f16
|
||||||
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
|
GGML_ASSERT(tensor_storage.nbytes() == tensor_data_size * 2);
|
||||||
@ -821,7 +812,7 @@ struct PickleTensorReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_string(const std::string& str, struct zip_t* zip, std::string dir) {
|
void read_string(const std::string& str, zip_t* zip, std::string dir) {
|
||||||
if (str == "storage") {
|
if (str == "storage") {
|
||||||
read_global_type = true;
|
read_global_type = true;
|
||||||
} else if (str != "state_dict") {
|
} else if (str != "state_dict") {
|
||||||
@ -1004,7 +995,7 @@ bool ModelLoader::init_from_ckpt_file(const std::string& file_path, const std::s
|
|||||||
file_paths_.push_back(file_path);
|
file_paths_.push_back(file_path);
|
||||||
size_t file_index = file_paths_.size() - 1;
|
size_t file_index = file_paths_.size() - 1;
|
||||||
|
|
||||||
struct zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
zip_t* zip = zip_open(file_path.c_str(), 0, 'r');
|
||||||
if (zip == nullptr) {
|
if (zip == nullptr) {
|
||||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||||
return false;
|
return false;
|
||||||
@ -1043,10 +1034,14 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
|
|
||||||
bool is_xl = false;
|
bool is_xl = false;
|
||||||
bool is_flux = false;
|
bool is_flux = false;
|
||||||
|
bool is_flux2 = false;
|
||||||
|
bool has_single_block_47 = false;
|
||||||
bool is_wan = false;
|
bool is_wan = false;
|
||||||
int64_t patch_embedding_channels = 0;
|
int64_t patch_embedding_channels = 0;
|
||||||
bool has_img_emb = false;
|
bool has_img_emb = false;
|
||||||
bool has_middle_block_1 = false;
|
bool has_middle_block_1 = false;
|
||||||
|
bool has_output_block_311 = false;
|
||||||
|
bool has_output_block_71 = false;
|
||||||
|
|
||||||
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
for (auto& [name, tensor_storage] : tensor_storage_map) {
|
||||||
if (!(is_xl)) {
|
if (!(is_xl)) {
|
||||||
@ -1062,6 +1057,21 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
if (tensor_storage.name.find("model.diffusion_model.transformer_blocks.0.img_mod.1.weight") != std::string::npos) {
|
if (tensor_storage.name.find("model.diffusion_model.transformer_blocks.0.img_mod.1.weight") != std::string::npos) {
|
||||||
return VERSION_QWEN_IMAGE;
|
return VERSION_QWEN_IMAGE;
|
||||||
}
|
}
|
||||||
|
if (tensor_storage.name.find("llm_adapter.blocks.0.cross_attn.q_proj.weight") != std::string::npos) {
|
||||||
|
return VERSION_ANIMA;
|
||||||
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.double_stream_modulation_img.lin.weight") != std::string::npos) {
|
||||||
|
is_flux2 = true;
|
||||||
|
}
|
||||||
|
if (tensor_storage.name.find("single_blocks.47.linear1.weight") != std::string::npos) {
|
||||||
|
has_single_block_47 = true;
|
||||||
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.double_blocks.0.img_mlp.gate_proj.weight") != std::string::npos) {
|
||||||
|
return VERSION_OVIS_IMAGE;
|
||||||
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.cap_embedder.0.weight") != std::string::npos) {
|
||||||
|
return VERSION_Z_IMAGE;
|
||||||
|
}
|
||||||
if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) {
|
if (tensor_storage.name.find("model.diffusion_model.blocks.0.cross_attn.norm_k.weight") != std::string::npos) {
|
||||||
is_wan = true;
|
is_wan = true;
|
||||||
}
|
}
|
||||||
@ -1094,6 +1104,14 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
|
tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
|
||||||
has_middle_block_1 = true;
|
has_middle_block_1 = true;
|
||||||
}
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos ||
|
||||||
|
tensor_storage.name.find("unet.up_blocks.1.attentions.0.transformer_blocks.1") != std::string::npos) {
|
||||||
|
has_output_block_311 = true;
|
||||||
|
}
|
||||||
|
if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos ||
|
||||||
|
tensor_storage.name.find("unet.up_blocks.2.attentions.1") != std::string::npos) {
|
||||||
|
has_output_block_71 = true;
|
||||||
|
}
|
||||||
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
|
if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
|
||||||
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
|
tensor_storage.name == "cond_stage_model.model.token_embedding.weight" ||
|
||||||
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
|
tensor_storage.name == "text_model.embeddings.token_embedding.weight" ||
|
||||||
@ -1129,12 +1147,15 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
return VERSION_SDXL_PIX2PIX;
|
return VERSION_SDXL_PIX2PIX;
|
||||||
}
|
}
|
||||||
if (!has_middle_block_1) {
|
if (!has_middle_block_1) {
|
||||||
|
if (!has_output_block_311) {
|
||||||
|
return VERSION_SDXL_VEGA;
|
||||||
|
}
|
||||||
return VERSION_SDXL_SSD1B;
|
return VERSION_SDXL_SSD1B;
|
||||||
}
|
}
|
||||||
return VERSION_SDXL;
|
return VERSION_SDXL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_flux) {
|
if (is_flux && !is_flux2) {
|
||||||
if (input_block_weight.ne[0] == 384) {
|
if (input_block_weight.ne[0] == 384) {
|
||||||
return VERSION_FLUX_FILL;
|
return VERSION_FLUX_FILL;
|
||||||
}
|
}
|
||||||
@ -1147,6 +1168,13 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
return VERSION_FLUX;
|
return VERSION_FLUX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_flux2) {
|
||||||
|
if (has_single_block_47) {
|
||||||
|
return VERSION_FLUX2;
|
||||||
|
}
|
||||||
|
return VERSION_FLUX2_KLEIN;
|
||||||
|
}
|
||||||
|
|
||||||
if (token_embedding_weight.ne[0] == 768) {
|
if (token_embedding_weight.ne[0] == 768) {
|
||||||
if (is_inpaint) {
|
if (is_inpaint) {
|
||||||
return VERSION_SD1_INPAINT;
|
return VERSION_SD1_INPAINT;
|
||||||
@ -1155,6 +1183,9 @@ SDVersion ModelLoader::get_sd_version() {
|
|||||||
return VERSION_SD1_PIX2PIX;
|
return VERSION_SD1_PIX2PIX;
|
||||||
}
|
}
|
||||||
if (!has_middle_block_1) {
|
if (!has_middle_block_1) {
|
||||||
|
if (!has_output_block_71) {
|
||||||
|
return VERSION_SDXS;
|
||||||
|
}
|
||||||
return VERSION_SD1_TINY_UNET;
|
return VERSION_SD1_TINY_UNET;
|
||||||
}
|
}
|
||||||
return VERSION_SD1;
|
return VERSION_SD1;
|
||||||
@ -1310,34 +1341,14 @@ void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_ru
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ModelLoader::load_merges() {
|
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
|
||||||
std::string merges_utf8_str(reinterpret_cast<const char*>(merges_utf8_c_str), sizeof(merges_utf8_c_str));
|
|
||||||
return merges_utf8_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ModelLoader::load_qwen2_merges() {
|
|
||||||
std::string merges_utf8_str(reinterpret_cast<const char*>(qwen2_merges_utf8_c_str), sizeof(qwen2_merges_utf8_c_str));
|
|
||||||
return merges_utf8_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ModelLoader::load_t5_tokenizer_json() {
|
|
||||||
std::string json_str(reinterpret_cast<const char*>(t5_tokenizer_json_str), sizeof(t5_tokenizer_json_str));
|
|
||||||
return json_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ModelLoader::load_umt5_tokenizer_json() {
|
|
||||||
std::string json_str(reinterpret_cast<const char*>(umt5_tokenizer_json_str), sizeof(umt5_tokenizer_json_str));
|
|
||||||
return json_str;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p) {
|
|
||||||
int64_t process_time_ms = 0;
|
int64_t process_time_ms = 0;
|
||||||
std::atomic<int64_t> read_time_ms(0);
|
std::atomic<int64_t> read_time_ms(0);
|
||||||
std::atomic<int64_t> memcpy_time_ms(0);
|
std::atomic<int64_t> memcpy_time_ms(0);
|
||||||
std::atomic<int64_t> copy_to_backend_time_ms(0);
|
std::atomic<int64_t> copy_to_backend_time_ms(0);
|
||||||
std::atomic<int64_t> convert_time_ms(0);
|
std::atomic<int64_t> convert_time_ms(0);
|
||||||
|
|
||||||
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : get_num_physical_cores();
|
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
|
||||||
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
|
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
|
||||||
|
|
||||||
int64_t start_time = ggml_time_ms();
|
int64_t start_time = ggml_time_ms();
|
||||||
@ -1380,6 +1391,15 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<MmapWrapper> mmapped;
|
||||||
|
if (enable_mmap && !is_zip) {
|
||||||
|
LOG_DEBUG("using mmap for I/O");
|
||||||
|
mmapped = MmapWrapper::create(file_path);
|
||||||
|
if (!mmapped) {
|
||||||
|
LOG_WARN("failed to memory-map '%s'", file_path.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
|
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
|
||||||
if (n_threads < 1) {
|
if (n_threads < 1) {
|
||||||
n_threads = 1;
|
n_threads = 1;
|
||||||
@ -1393,7 +1413,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
for (int i = 0; i < n_threads; ++i) {
|
for (int i = 0; i < n_threads; ++i) {
|
||||||
workers.emplace_back([&, file_path, is_zip]() {
|
workers.emplace_back([&, file_path, is_zip]() {
|
||||||
std::ifstream file;
|
std::ifstream file;
|
||||||
struct zip_t* zip = nullptr;
|
zip_t* zip = nullptr;
|
||||||
if (is_zip) {
|
if (is_zip) {
|
||||||
zip = zip_open(file_path.c_str(), 0, 'r');
|
zip = zip_open(file_path.c_str(), 0, 'r');
|
||||||
if (zip == nullptr) {
|
if (zip == nullptr) {
|
||||||
@ -1401,7 +1421,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
failed = true;
|
failed = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else if (!mmapped) {
|
||||||
file.open(file_path, std::ios::binary);
|
file.open(file_path, std::ios::binary);
|
||||||
if (!file.is_open()) {
|
if (!file.is_open()) {
|
||||||
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
LOG_ERROR("failed to open '%s'", file_path.c_str());
|
||||||
@ -1454,6 +1474,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
zip_entry_noallocread(zip, (void*)buf, n);
|
zip_entry_noallocread(zip, (void*)buf, n);
|
||||||
}
|
}
|
||||||
zip_entry_close(zip);
|
zip_entry_close(zip);
|
||||||
|
} else if (mmapped) {
|
||||||
|
if (!mmapped->copy_data(buf, n, tensor_storage.offset)) {
|
||||||
|
LOG_ERROR("read tensor data failed: '%s'", file_path.c_str());
|
||||||
|
failed = true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
file.seekg(tensor_storage.offset);
|
file.seekg(tensor_storage.offset);
|
||||||
file.read(buf, n);
|
file.read(buf, n);
|
||||||
@ -1500,9 +1525,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
read_time_ms.fetch_add(t1 - t0);
|
read_time_ms.fetch_add(t1 - t0);
|
||||||
|
|
||||||
t0 = ggml_time_ms();
|
t0 = ggml_time_ms();
|
||||||
if (tensor_storage.is_bf16) {
|
if (tensor_storage.is_f8_e4m3) {
|
||||||
bf16_to_f32_vec((uint16_t*)read_buf, (float*)target_buf, tensor_storage.nelements());
|
|
||||||
} else if (tensor_storage.is_f8_e4m3) {
|
|
||||||
f8_e4m3_to_f16_vec((uint8_t*)read_buf, (uint16_t*)target_buf, tensor_storage.nelements());
|
f8_e4m3_to_f16_vec((uint8_t*)read_buf, (uint16_t*)target_buf, tensor_storage.nelements());
|
||||||
} else if (tensor_storage.is_f8_e5m2) {
|
} else if (tensor_storage.is_f8_e5m2) {
|
||||||
f8_e5m2_to_f16_vec((uint8_t*)read_buf, (uint16_t*)target_buf, tensor_storage.nelements());
|
f8_e5m2_to_f16_vec((uint8_t*)read_buf, (uint16_t*)target_buf, tensor_storage.nelements());
|
||||||
@ -1512,6 +1535,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
i64_to_i32_vec((int64_t*)read_buf, (int32_t*)target_buf, tensor_storage.nelements());
|
i64_to_i32_vec((int64_t*)read_buf, (int32_t*)target_buf, tensor_storage.nelements());
|
||||||
}
|
}
|
||||||
if (tensor_storage.type != dst_tensor->type) {
|
if (tensor_storage.type != dst_tensor->type) {
|
||||||
|
if (convert_buf == nullptr) {
|
||||||
|
LOG_ERROR("read tensor data failed: too less memory for conversion");
|
||||||
|
failed = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
convert_tensor((void*)target_buf,
|
convert_tensor((void*)target_buf,
|
||||||
tensor_storage.type,
|
tensor_storage.type,
|
||||||
convert_buf,
|
convert_buf,
|
||||||
@ -1543,7 +1571,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
size_t curr_num = total_tensors_processed + current_idx;
|
size_t curr_num = total_tensors_processed + current_idx;
|
||||||
pretty_progress(curr_num, total_tensors_to_process, (ggml_time_ms() - t_start) / 1000.0f / (curr_num + 1e-6f));
|
pretty_progress(static_cast<int>(curr_num), static_cast<int>(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (curr_num + 1e-6f));
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1556,7 +1584,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
total_tensors_processed += file_tensors.size();
|
total_tensors_processed += file_tensors.size();
|
||||||
pretty_progress(total_tensors_processed, total_tensors_to_process, (ggml_time_ms() - t_start) / 1000.0f / (total_tensors_processed + 1e-6f));
|
pretty_progress(static_cast<int>(total_tensors_processed), static_cast<int>(total_tensors_to_process), (ggml_time_ms() - t_start) / 1000.0f / (total_tensors_processed + 1e-6f));
|
||||||
if (total_tensors_processed < total_tensors_to_process) {
|
if (total_tensors_processed < total_tensors_to_process) {
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@ -1573,9 +1601,10 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
|
|||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
bool ModelLoader::load_tensors(std::map<std::string, ggml_tensor*>& tensors,
|
||||||
std::set<std::string> ignore_tensors,
|
std::set<std::string> ignore_tensors,
|
||||||
int n_threads) {
|
int n_threads,
|
||||||
|
bool enable_mmap) {
|
||||||
std::set<std::string> tensor_names_in_file;
|
std::set<std::string> tensor_names_in_file;
|
||||||
std::mutex tensor_names_mutex;
|
std::mutex tensor_names_mutex;
|
||||||
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool {
|
||||||
@ -1586,7 +1615,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||||||
tensor_names_in_file.insert(name);
|
tensor_names_in_file.insert(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* real;
|
ggml_tensor* real;
|
||||||
if (tensors.find(name) != tensors.end()) {
|
if (tensors.find(name) != tensors.end()) {
|
||||||
real = tensors[name];
|
real = tensors[name];
|
||||||
} else {
|
} else {
|
||||||
@ -1618,7 +1647,7 @@ bool ModelLoader::load_tensors(std::map<std::string, struct ggml_tensor*>& tenso
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool success = load_tensors(on_new_tensor_cb, n_threads);
|
bool success = load_tensors(on_new_tensor_cb, n_threads, enable_mmap);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
LOG_ERROR("load tensors from file failed");
|
LOG_ERROR("load tensors from file failed");
|
||||||
return false;
|
return false;
|
||||||
@ -1724,6 +1753,13 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type
|
|||||||
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
|
// tensor_storage.ne[0], tensor_storage.ne[1], tensor_storage.ne[2], tensor_storage.ne[3],
|
||||||
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
// tensor->n_dims, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
||||||
|
|
||||||
|
if (!tensor->data) {
|
||||||
|
GGML_ASSERT(ggml_nelements(tensor) == 0);
|
||||||
|
// avoid crashing the gguf writer by setting a dummy pointer for zero-sized tensors
|
||||||
|
LOG_DEBUG("setting dummy pointer for zero-sized tensor %s", name.c_str());
|
||||||
|
tensor->data = ggml_get_mem_buffer(ggml_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
*dst_tensor = tensor;
|
*dst_tensor = tensor;
|
||||||
|
|
||||||
gguf_add_tensor(gguf_ctx, tensor);
|
gguf_add_tensor(gguf_ctx, tensor);
|
||||||
@ -1763,7 +1799,12 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type)
|
|||||||
return mem_size;
|
return mem_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type, const char* tensor_type_rules) {
|
bool convert(const char* input_path,
|
||||||
|
const char* vae_path,
|
||||||
|
const char* output_path,
|
||||||
|
sd_type_t output_type,
|
||||||
|
const char* tensor_type_rules,
|
||||||
|
bool convert_name) {
|
||||||
ModelLoader model_loader;
|
ModelLoader model_loader;
|
||||||
|
|
||||||
if (!model_loader.init_from_file(input_path)) {
|
if (!model_loader.init_from_file(input_path)) {
|
||||||
@ -1777,7 +1818,9 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (convert_name) {
|
||||||
model_loader.convert_tensors_name();
|
model_loader.convert_tensors_name();
|
||||||
|
}
|
||||||
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, tensor_type_rules);
|
bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, tensor_type_rules);
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
@ -28,9 +28,11 @@ enum SDVersion {
|
|||||||
VERSION_SD2,
|
VERSION_SD2,
|
||||||
VERSION_SD2_INPAINT,
|
VERSION_SD2_INPAINT,
|
||||||
VERSION_SD2_TINY_UNET,
|
VERSION_SD2_TINY_UNET,
|
||||||
|
VERSION_SDXS,
|
||||||
VERSION_SDXL,
|
VERSION_SDXL,
|
||||||
VERSION_SDXL_INPAINT,
|
VERSION_SDXL_INPAINT,
|
||||||
VERSION_SDXL_PIX2PIX,
|
VERSION_SDXL_PIX2PIX,
|
||||||
|
VERSION_SDXL_VEGA,
|
||||||
VERSION_SDXL_SSD1B,
|
VERSION_SDXL_SSD1B,
|
||||||
VERSION_SVD,
|
VERSION_SVD,
|
||||||
VERSION_SD3,
|
VERSION_SD3,
|
||||||
@ -43,11 +45,16 @@ enum SDVersion {
|
|||||||
VERSION_WAN2_2_I2V,
|
VERSION_WAN2_2_I2V,
|
||||||
VERSION_WAN2_2_TI2V,
|
VERSION_WAN2_2_TI2V,
|
||||||
VERSION_QWEN_IMAGE,
|
VERSION_QWEN_IMAGE,
|
||||||
|
VERSION_ANIMA,
|
||||||
|
VERSION_FLUX2,
|
||||||
|
VERSION_FLUX2_KLEIN,
|
||||||
|
VERSION_Z_IMAGE,
|
||||||
|
VERSION_OVIS_IMAGE,
|
||||||
VERSION_COUNT,
|
VERSION_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool sd_version_is_sd1(SDVersion version) {
|
static inline bool sd_version_is_sd1(SDVersion version) {
|
||||||
if (version == VERSION_SD1 || version == VERSION_SD1_INPAINT || version == VERSION_SD1_PIX2PIX || version == VERSION_SD1_TINY_UNET) {
|
if (version == VERSION_SD1 || version == VERSION_SD1_INPAINT || version == VERSION_SD1_PIX2PIX || version == VERSION_SD1_TINY_UNET || version == VERSION_SDXS) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -61,7 +68,7 @@ static inline bool sd_version_is_sd2(SDVersion version) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline bool sd_version_is_sdxl(SDVersion version) {
|
static inline bool sd_version_is_sdxl(SDVersion version) {
|
||||||
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B) {
|
if (version == VERSION_SDXL || version == VERSION_SDXL_INPAINT || version == VERSION_SDXL_PIX2PIX || version == VERSION_SDXL_SSD1B || version == VERSION_SDXL_VEGA) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -88,12 +95,20 @@ static inline bool sd_version_is_flux(SDVersion version) {
|
|||||||
version == VERSION_FLUX_FILL ||
|
version == VERSION_FLUX_FILL ||
|
||||||
version == VERSION_FLUX_CONTROLS ||
|
version == VERSION_FLUX_CONTROLS ||
|
||||||
version == VERSION_FLEX_2 ||
|
version == VERSION_FLEX_2 ||
|
||||||
|
version == VERSION_OVIS_IMAGE ||
|
||||||
version == VERSION_CHROMA_RADIANCE) {
|
version == VERSION_CHROMA_RADIANCE) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool sd_version_is_flux2(SDVersion version) {
|
||||||
|
if (version == VERSION_FLUX2 || version == VERSION_FLUX2_KLEIN) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool sd_version_is_wan(SDVersion version) {
|
static inline bool sd_version_is_wan(SDVersion version) {
|
||||||
if (version == VERSION_WAN2 || version == VERSION_WAN2_2_I2V || version == VERSION_WAN2_2_TI2V) {
|
if (version == VERSION_WAN2 || version == VERSION_WAN2_2_I2V || version == VERSION_WAN2_2_TI2V) {
|
||||||
return true;
|
return true;
|
||||||
@ -108,6 +123,20 @@ static inline bool sd_version_is_qwen_image(SDVersion version) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool sd_version_is_anima(SDVersion version) {
|
||||||
|
if (version == VERSION_ANIMA) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool sd_version_is_z_image(SDVersion version) {
|
||||||
|
if (version == VERSION_Z_IMAGE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool sd_version_is_inpaint(SDVersion version) {
|
static inline bool sd_version_is_inpaint(SDVersion version) {
|
||||||
if (version == VERSION_SD1_INPAINT ||
|
if (version == VERSION_SD1_INPAINT ||
|
||||||
version == VERSION_SD2_INPAINT ||
|
version == VERSION_SD2_INPAINT ||
|
||||||
@ -121,9 +150,12 @@ static inline bool sd_version_is_inpaint(SDVersion version) {
|
|||||||
|
|
||||||
static inline bool sd_version_is_dit(SDVersion version) {
|
static inline bool sd_version_is_dit(SDVersion version) {
|
||||||
if (sd_version_is_flux(version) ||
|
if (sd_version_is_flux(version) ||
|
||||||
|
sd_version_is_flux2(version) ||
|
||||||
sd_version_is_sd3(version) ||
|
sd_version_is_sd3(version) ||
|
||||||
sd_version_is_wan(version) ||
|
sd_version_is_wan(version) ||
|
||||||
sd_version_is_qwen_image(version)) {
|
sd_version_is_qwen_image(version) ||
|
||||||
|
sd_version_is_anima(version) ||
|
||||||
|
sd_version_is_z_image(version)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -150,7 +182,6 @@ struct TensorStorage {
|
|||||||
std::string name;
|
std::string name;
|
||||||
ggml_type type = GGML_TYPE_F32;
|
ggml_type type = GGML_TYPE_F32;
|
||||||
ggml_type expected_type = GGML_TYPE_COUNT;
|
ggml_type expected_type = GGML_TYPE_COUNT;
|
||||||
bool is_bf16 = false;
|
|
||||||
bool is_f8_e4m3 = false;
|
bool is_f8_e4m3 = false;
|
||||||
bool is_f8_e5m2 = false;
|
bool is_f8_e5m2 = false;
|
||||||
bool is_f64 = false;
|
bool is_f64 = false;
|
||||||
@ -184,7 +215,7 @@ struct TensorStorage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int64_t nbytes_to_read() const {
|
int64_t nbytes_to_read() const {
|
||||||
if (is_bf16 || is_f8_e4m3 || is_f8_e5m2) {
|
if (is_f8_e4m3 || is_f8_e5m2) {
|
||||||
return nbytes() / 2;
|
return nbytes() / 2;
|
||||||
} else if (is_f64 || is_i64) {
|
} else if (is_f64 || is_i64) {
|
||||||
return nbytes() * 2;
|
return nbytes() * 2;
|
||||||
@ -232,9 +263,7 @@ struct TensorStorage {
|
|||||||
std::string to_string() const {
|
std::string to_string() const {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
const char* type_name = ggml_type_name(type);
|
const char* type_name = ggml_type_name(type);
|
||||||
if (is_bf16) {
|
if (is_f8_e4m3) {
|
||||||
type_name = "bf16";
|
|
||||||
} else if (is_f8_e4m3) {
|
|
||||||
type_name = "f8_e4m3";
|
type_name = "f8_e4m3";
|
||||||
} else if (is_f8_e5m2) {
|
} else if (is_f8_e5m2) {
|
||||||
type_name = "f8_e5m2";
|
type_name = "f8_e5m2";
|
||||||
@ -293,10 +322,11 @@ public:
|
|||||||
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
|
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
|
||||||
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
|
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
|
||||||
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
|
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
|
||||||
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0);
|
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
|
||||||
bool load_tensors(std::map<std::string, struct ggml_tensor*>& tensors,
|
bool load_tensors(std::map<std::string, ggml_tensor*>& tensors,
|
||||||
std::set<std::string> ignore_tensors = {},
|
std::set<std::string> ignore_tensors = {},
|
||||||
int n_threads = 0);
|
int n_threads = 0,
|
||||||
|
bool use_mmap = false);
|
||||||
|
|
||||||
std::vector<std::string> get_tensor_names() const {
|
std::vector<std::string> get_tensor_names() const {
|
||||||
std::vector<std::string> names;
|
std::vector<std::string> names;
|
||||||
@ -310,11 +340,6 @@ public:
|
|||||||
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
|
bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type);
|
||||||
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
|
int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT);
|
||||||
~ModelLoader() = default;
|
~ModelLoader() = default;
|
||||||
|
|
||||||
static std::string load_merges();
|
|
||||||
static std::string load_qwen2_merges();
|
|
||||||
static std::string load_t5_tokenizer_json();
|
|
||||||
static std::string load_umt5_tokenizer_json();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __MODEL_H__
|
#endif // __MODEL_H__
|
||||||
@ -127,12 +127,14 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
|||||||
{"token_embd.", "shared."},
|
{"token_embd.", "shared."},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::vector<std::pair<std::string, std::string>> qwenvl_name_map{
|
static const std::vector<std::pair<std::string, std::string>> llm_name_map{
|
||||||
{"token_embd.", "model.embed_tokens."},
|
{"token_embd.", "model.embed_tokens."},
|
||||||
{"blk.", "model.layers."},
|
{"blk.", "model.layers."},
|
||||||
{"attn_q.", "self_attn.q_proj."},
|
{"attn_q.", "self_attn.q_proj."},
|
||||||
{"attn_k.", "self_attn.k_proj."},
|
{"attn_k.", "self_attn.k_proj."},
|
||||||
{"attn_v.", "self_attn.v_proj."},
|
{"attn_v.", "self_attn.v_proj."},
|
||||||
|
{"attn_q_norm.", "self_attn.q_norm."},
|
||||||
|
{"attn_k_norm.", "self_attn.k_norm."},
|
||||||
{"attn_output.", "self_attn.o_proj."},
|
{"attn_output.", "self_attn.o_proj."},
|
||||||
{"attn_norm.", "input_layernorm."},
|
{"attn_norm.", "input_layernorm."},
|
||||||
{"ffn_down.", "mlp.down_proj."},
|
{"ffn_down.", "mlp.down_proj."},
|
||||||
@ -142,7 +144,7 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
|||||||
{"output_norm.", "model.norm."},
|
{"output_norm.", "model.norm."},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::vector<std::pair<std::string, std::string>> qwenvl_vision_name_map{
|
static const std::vector<std::pair<std::string, std::string>> llm_vision_name_map{
|
||||||
{"mm.", "merger.mlp."},
|
{"mm.", "merger.mlp."},
|
||||||
{"v.post_ln.", "merger.ln_q."},
|
{"v.post_ln.", "merger.ln_q."},
|
||||||
{"v.patch_embd.weight", "patch_embed.proj.0.weight"},
|
{"v.patch_embd.weight", "patch_embed.proj.0.weight"},
|
||||||
@ -161,11 +163,11 @@ std::string convert_cond_stage_model_name(std::string name, std::string prefix)
|
|||||||
};
|
};
|
||||||
if (contains(name, "t5xxl")) {
|
if (contains(name, "t5xxl")) {
|
||||||
replace_with_name_map(name, t5_name_map);
|
replace_with_name_map(name, t5_name_map);
|
||||||
} else if (contains(name, "qwen2vl")) {
|
} else if (contains(name, "llm")) {
|
||||||
if (contains(name, "qwen2vl.visual")) {
|
if (contains(name, "llm.visual")) {
|
||||||
replace_with_name_map(name, qwenvl_vision_name_map);
|
replace_with_name_map(name, llm_vision_name_map);
|
||||||
} else {
|
} else {
|
||||||
replace_with_name_map(name, qwenvl_name_map);
|
replace_with_name_map(name, llm_name_map);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
name = convert_open_clip_to_hf_clip_name(name);
|
name = convert_open_clip_to_hf_clip_name(name);
|
||||||
@ -613,6 +615,52 @@ std::string convert_diffusers_dit_to_original_flux(std::string name) {
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string convert_diffusers_dit_to_original_lumina2(std::string name) {
|
||||||
|
int num_layers = 30;
|
||||||
|
int num_refiner_layers = 2;
|
||||||
|
static std::unordered_map<std::string, std::string> z_image_name_map;
|
||||||
|
|
||||||
|
if (z_image_name_map.empty()) {
|
||||||
|
z_image_name_map["all_x_embedder.2-1."] = "x_embedder.";
|
||||||
|
z_image_name_map["all_final_layer.2-1."] = "final_layer.";
|
||||||
|
|
||||||
|
// --- transformer blocks ---
|
||||||
|
auto add_attention_map = [&](const std::string& prefix, int num) {
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
std::string block_prefix = prefix + std::to_string(i) + ".";
|
||||||
|
std::string dst_prefix = prefix + std::to_string(i) + ".";
|
||||||
|
|
||||||
|
z_image_name_map[block_prefix + "attention.norm_q."] = dst_prefix + "attention.q_norm.";
|
||||||
|
z_image_name_map[block_prefix + "attention.norm_k."] = dst_prefix + "attention.k_norm.";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_out.0."] = dst_prefix + "attention.out.";
|
||||||
|
|
||||||
|
z_image_name_map[block_prefix + "attention.to_q.weight"] = dst_prefix + "attention.qkv.weight";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_q.bias"] = dst_prefix + "attention.qkv.bias";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_k.weight"] = dst_prefix + "attention.qkv.weight.1";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_k.bias"] = dst_prefix + "attention.qkv.bias.1";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_v.weight"] = dst_prefix + "attention.qkv.weight.2";
|
||||||
|
z_image_name_map[block_prefix + "attention.to_v.bias"] = dst_prefix + "attention.qkv.bias.2";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
add_attention_map("noise_refiner.", num_refiner_layers);
|
||||||
|
add_attention_map("context_refiner.", num_refiner_layers);
|
||||||
|
add_attention_map("layers.", num_layers);
|
||||||
|
}
|
||||||
|
|
||||||
|
replace_with_prefix_map(name, z_image_name_map);
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string convert_other_dit_to_original_anima(std::string name) {
|
||||||
|
static const std::string anima_net_prefix = "net.";
|
||||||
|
if (!starts_with(name, anima_net_prefix)) {
|
||||||
|
name = anima_net_prefix + name;
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
std::string convert_diffusion_model_name(std::string name, std::string prefix, SDVersion version) {
|
std::string convert_diffusion_model_name(std::string name, std::string prefix, SDVersion version) {
|
||||||
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
if (sd_version_is_sd1(version) || sd_version_is_sd2(version)) {
|
||||||
name = convert_diffusers_unet_to_original_sd1(name);
|
name = convert_diffusers_unet_to_original_sd1(name);
|
||||||
@ -620,8 +668,12 @@ std::string convert_diffusion_model_name(std::string name, std::string prefix, S
|
|||||||
name = convert_diffusers_unet_to_original_sdxl(name);
|
name = convert_diffusers_unet_to_original_sdxl(name);
|
||||||
} else if (sd_version_is_sd3(version)) {
|
} else if (sd_version_is_sd3(version)) {
|
||||||
name = convert_diffusers_dit_to_original_sd3(name);
|
name = convert_diffusers_dit_to_original_sd3(name);
|
||||||
} else if (sd_version_is_flux(version)) {
|
} else if (sd_version_is_flux(version) || sd_version_is_flux2(version)) {
|
||||||
name = convert_diffusers_dit_to_original_flux(name);
|
name = convert_diffusers_dit_to_original_flux(name);
|
||||||
|
} else if (sd_version_is_z_image(version)) {
|
||||||
|
name = convert_diffusers_dit_to_original_lumina2(name);
|
||||||
|
} else if (sd_version_is_anima(version)) {
|
||||||
|
name = convert_other_dit_to_original_anima(name);
|
||||||
}
|
}
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
@ -722,6 +774,11 @@ std::string convert_diffusers_vae_to_original_sd1(std::string name) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string convert_first_stage_model_name(std::string name, std::string prefix) {
|
std::string convert_first_stage_model_name(std::string name, std::string prefix) {
|
||||||
|
static std::unordered_map<std::string, std::string> vae_name_map = {
|
||||||
|
{"decoder.post_quant_conv.", "post_quant_conv."},
|
||||||
|
{"encoder.quant_conv.", "quant_conv."},
|
||||||
|
};
|
||||||
|
replace_with_prefix_map(name, vae_name_map);
|
||||||
name = convert_diffusers_vae_to_original_sd1(name);
|
name = convert_diffusers_vae_to_original_sd1(name);
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
@ -788,12 +845,14 @@ std::string convert_sep_to_dot(std::string name) {
|
|||||||
"proj_out",
|
"proj_out",
|
||||||
"transformer_blocks",
|
"transformer_blocks",
|
||||||
"single_transformer_blocks",
|
"single_transformer_blocks",
|
||||||
|
"single_blocks",
|
||||||
"diffusion_model",
|
"diffusion_model",
|
||||||
"cond_stage_model",
|
"cond_stage_model",
|
||||||
"first_stage_model",
|
"first_stage_model",
|
||||||
"conv_in",
|
"conv_in",
|
||||||
"conv_out",
|
"conv_out",
|
||||||
"lora_down",
|
"lora_down",
|
||||||
|
"lora_mid",
|
||||||
"lora_up",
|
"lora_up",
|
||||||
"diff_b",
|
"diff_b",
|
||||||
"hada_w1_a",
|
"hada_w1_a",
|
||||||
@ -829,7 +888,18 @@ std::string convert_sep_to_dot(std::string name) {
|
|||||||
"ff_context",
|
"ff_context",
|
||||||
"norm_added_q",
|
"norm_added_q",
|
||||||
"norm_added_v",
|
"norm_added_v",
|
||||||
"to_add_out"};
|
"to_add_out",
|
||||||
|
"txt_mod",
|
||||||
|
"img_mod",
|
||||||
|
"txt_mlp",
|
||||||
|
"img_mlp",
|
||||||
|
"proj_mlp",
|
||||||
|
"wi_0",
|
||||||
|
"wi_1",
|
||||||
|
"norm1_context",
|
||||||
|
"ff_context",
|
||||||
|
"x_embedder",
|
||||||
|
};
|
||||||
|
|
||||||
// record the positions of underscores that should NOT be replaced
|
// record the positions of underscores that should NOT be replaced
|
||||||
std::unordered_set<size_t> protected_positions;
|
std::unordered_set<size_t> protected_positions;
|
||||||
@ -901,6 +971,7 @@ bool is_first_stage_model_name(const std::string& name) {
|
|||||||
std::string convert_tensor_name(std::string name, SDVersion version) {
|
std::string convert_tensor_name(std::string name, SDVersion version) {
|
||||||
bool is_lora = false;
|
bool is_lora = false;
|
||||||
bool is_lycoris_underline = false;
|
bool is_lycoris_underline = false;
|
||||||
|
bool is_underline = false;
|
||||||
std::vector<std::string> lora_prefix_vec = {
|
std::vector<std::string> lora_prefix_vec = {
|
||||||
"lora.lora.",
|
"lora.lora.",
|
||||||
"lora.lora_",
|
"lora.lora_",
|
||||||
@ -908,12 +979,27 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
|
|||||||
"lora.lycoris.",
|
"lora.lycoris.",
|
||||||
"lora.",
|
"lora.",
|
||||||
};
|
};
|
||||||
|
std::vector<std::string> underline_lora_prefix_vec = {
|
||||||
|
"unet_",
|
||||||
|
"te_",
|
||||||
|
"te1_",
|
||||||
|
"te2_",
|
||||||
|
"te3_",
|
||||||
|
"vae_",
|
||||||
|
};
|
||||||
for (const auto& prefix : lora_prefix_vec) {
|
for (const auto& prefix : lora_prefix_vec) {
|
||||||
if (starts_with(name, prefix)) {
|
if (starts_with(name, prefix)) {
|
||||||
is_lora = true;
|
is_lora = true;
|
||||||
name = name.substr(prefix.size());
|
name = name.substr(prefix.size());
|
||||||
if (contains(prefix, "lycoris_")) {
|
if (contains(prefix, "lycoris_")) {
|
||||||
is_lycoris_underline = true;
|
is_lycoris_underline = true;
|
||||||
|
} else {
|
||||||
|
for (const auto& underline_lora_prefix : underline_lora_prefix_vec) {
|
||||||
|
if (starts_with(name, underline_lora_prefix)) {
|
||||||
|
is_underline = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -922,10 +1008,13 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
|
|||||||
if (is_lora) {
|
if (is_lora) {
|
||||||
std::map<std::string, std::string> lora_suffix_map = {
|
std::map<std::string, std::string> lora_suffix_map = {
|
||||||
{".lora_down.weight", ".weight.lora_down"},
|
{".lora_down.weight", ".weight.lora_down"},
|
||||||
|
{".lora_mid.weight", ".weight.lora_mid"},
|
||||||
{".lora_up.weight", ".weight.lora_up"},
|
{".lora_up.weight", ".weight.lora_up"},
|
||||||
{".lora.down.weight", ".weight.lora_down"},
|
{".lora.down.weight", ".weight.lora_down"},
|
||||||
|
{".lora.mid.weight", ".weight.lora_mid"},
|
||||||
{".lora.up.weight", ".weight.lora_up"},
|
{".lora.up.weight", ".weight.lora_up"},
|
||||||
{"_lora.down.weight", ".weight.lora_down"},
|
{"_lora.down.weight", ".weight.lora_down"},
|
||||||
|
{"_lora.mid.weight", ".weight.lora_mid"},
|
||||||
{"_lora.up.weight", ".weight.lora_up"},
|
{"_lora.up.weight", ".weight.lora_up"},
|
||||||
{".lora_A.weight", ".weight.lora_down"},
|
{".lora_A.weight", ".weight.lora_down"},
|
||||||
{".lora_B.weight", ".weight.lora_up"},
|
{".lora_B.weight", ".weight.lora_up"},
|
||||||
@ -973,12 +1062,14 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sd_version_is_unet(version) || is_lycoris_underline) {
|
// LOG_DEBUG("name %s %d", name.c_str(), version);
|
||||||
|
|
||||||
|
if (sd_version_is_unet(version) || is_underline || is_lycoris_underline) {
|
||||||
name = convert_sep_to_dot(name);
|
name = convert_sep_to_dot(name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<std::string, std::string>> prefix_map = {
|
std::unordered_map<std::string, std::string> prefix_map = {
|
||||||
{"diffusion_model.", "model.diffusion_model."},
|
{"diffusion_model.", "model.diffusion_model."},
|
||||||
{"unet.", "model.diffusion_model."},
|
{"unet.", "model.diffusion_model."},
|
||||||
{"transformer.", "model.diffusion_model."}, // dit
|
{"transformer.", "model.diffusion_model."}, // dit
|
||||||
@ -993,8 +1084,13 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
|
|||||||
// {"te2.text_model.encoder.layers.", "cond_stage_model.1.model.transformer.resblocks."},
|
// {"te2.text_model.encoder.layers.", "cond_stage_model.1.model.transformer.resblocks."},
|
||||||
{"te2.", "cond_stage_model.1.transformer."},
|
{"te2.", "cond_stage_model.1.transformer."},
|
||||||
{"te1.", "cond_stage_model.transformer."},
|
{"te1.", "cond_stage_model.transformer."},
|
||||||
|
{"te3.", "text_encoders.t5xxl.transformer."},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (sd_version_is_flux(version)) {
|
||||||
|
prefix_map["te1."] = "text_encoders.clip_l.transformer.";
|
||||||
|
}
|
||||||
|
|
||||||
replace_with_prefix_map(name, prefix_map);
|
replace_with_prefix_map(name, prefix_map);
|
||||||
|
|
||||||
// diffusion model
|
// diffusion model
|
||||||
@ -1024,7 +1120,11 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
|
|||||||
for (const auto& prefix : first_stage_model_prefix_vec) {
|
for (const auto& prefix : first_stage_model_prefix_vec) {
|
||||||
if (starts_with(name, prefix)) {
|
if (starts_with(name, prefix)) {
|
||||||
name = convert_first_stage_model_name(name.substr(prefix.size()), prefix);
|
name = convert_first_stage_model_name(name.substr(prefix.size()), prefix);
|
||||||
|
if (version == VERSION_SDXS) {
|
||||||
|
name = "tae." + name;
|
||||||
|
} else {
|
||||||
name = prefix + name;
|
name = prefix + name;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -21,19 +21,19 @@ public:
|
|||||||
blocks["layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(in_dim));
|
blocks["layernorm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(in_dim));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
|
ggml_tensor* forward(GGMLRunnerContext* ctx, ggml_tensor* x) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
|
|
||||||
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
auto fc1 = std::dynamic_pointer_cast<Linear>(blocks["fc1"]);
|
||||||
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
auto fc2 = std::dynamic_pointer_cast<Linear>(blocks["fc2"]);
|
||||||
auto layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["layernorm"]);
|
auto layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["layernorm"]);
|
||||||
|
|
||||||
struct ggml_tensor* r = x;
|
ggml_tensor* r = x;
|
||||||
// x = ggml_ext_layer_norm(ctx, x, ln_w, ln_b);
|
// x = ggml_ext_layer_norm(ctx, x, ln_w, ln_b);
|
||||||
x = layer_norm->forward(ctx, x);
|
x = layer_norm->forward(ctx, x);
|
||||||
// x = ggml_add(ctx, ggml_mul_mat(ctx, fc1_w, x), fc1_b);
|
// x = ggml_add(ctx, ggml_mul_mat(ctx, fc1_w, x), fc1_b);
|
||||||
x = fc1->forward(ctx, x);
|
x = fc1->forward(ctx, x);
|
||||||
x = ggml_gelu_inplace(ctx->ggml_ctx, x);
|
x = ggml_ext_gelu(ctx->ggml_ctx, x, true);
|
||||||
x = fc2->forward(ctx, x);
|
x = fc2->forward(ctx, x);
|
||||||
// x = ggml_add(ctx, ggml_mul_mat(ctx, fc2_w, x), fc2_b);
|
// x = ggml_add(ctx, ggml_mul_mat(ctx, fc2_w, x), fc2_b);
|
||||||
if (use_residue)
|
if (use_residue)
|
||||||
@ -54,8 +54,8 @@ public:
|
|||||||
blocks["1"] = std::shared_ptr<GGMLBlock>(new Mlp(dim, inner_dim, dim, false));
|
blocks["1"] = std::shared_ptr<GGMLBlock>(new Mlp(dim, inner_dim, dim, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x) {
|
ggml_tensor* x) {
|
||||||
auto norm = std::dynamic_pointer_cast<LayerNorm>(blocks["0"]);
|
auto norm = std::dynamic_pointer_cast<LayerNorm>(blocks["0"]);
|
||||||
auto ff = std::dynamic_pointer_cast<Mlp>(blocks["1"]);
|
auto ff = std::dynamic_pointer_cast<Mlp>(blocks["1"]);
|
||||||
|
|
||||||
@ -72,7 +72,7 @@ struct PerceiverAttention : public GGMLBlock {
|
|||||||
int heads; // = heads
|
int heads; // = heads
|
||||||
public:
|
public:
|
||||||
PerceiverAttention(int dim, int dim_h = 64, int h = 8)
|
PerceiverAttention(int dim, int dim_h = 64, int h = 8)
|
||||||
: scale(powf(dim_h, -0.5)), dim_head(dim_h), heads(h) {
|
: scale(powf(static_cast<float>(dim_h), -0.5f)), dim_head(dim_h), heads(h) {
|
||||||
int inner_dim = dim_head * heads;
|
int inner_dim = dim_head * heads;
|
||||||
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
|
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
|
||||||
blocks["norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
|
blocks["norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
|
||||||
@ -81,8 +81,8 @@ public:
|
|||||||
blocks["to_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim, false));
|
blocks["to_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* reshape_tensor(struct ggml_context* ctx,
|
ggml_tensor* reshape_tensor(ggml_context* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
int heads) {
|
int heads) {
|
||||||
int64_t ne[4];
|
int64_t ne[4];
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
@ -92,17 +92,17 @@ public:
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<struct ggml_tensor*> chunk_half(struct ggml_context* ctx,
|
std::vector<ggml_tensor*> chunk_half(ggml_context* ctx,
|
||||||
struct ggml_tensor* x) {
|
ggml_tensor* x) {
|
||||||
auto tlo = ggml_view_4d(ctx, x, x->ne[0] / 2, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], 0);
|
auto tlo = ggml_view_4d(ctx, x, x->ne[0] / 2, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], 0);
|
||||||
auto tli = ggml_view_4d(ctx, x, x->ne[0] / 2, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], x->nb[0] * x->ne[0] / 2);
|
auto tli = ggml_view_4d(ctx, x, x->ne[0] / 2, x->ne[1], x->ne[2], x->ne[3], x->nb[1], x->nb[2], x->nb[3], x->nb[0] * x->ne[0] / 2);
|
||||||
return {ggml_cont(ctx, tlo),
|
return {ggml_cont(ctx, tlo),
|
||||||
ggml_cont(ctx, tli)};
|
ggml_cont(ctx, tli)};
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* latents) {
|
ggml_tensor* latents) {
|
||||||
// x (torch.Tensor): image features
|
// x (torch.Tensor): image features
|
||||||
// shape (b, n1, D)
|
// shape (b, n1, D)
|
||||||
// latent (torch.Tensor): latent features
|
// latent (torch.Tensor): latent features
|
||||||
@ -129,8 +129,8 @@ public:
|
|||||||
k = reshape_tensor(ctx->ggml_ctx, k, heads);
|
k = reshape_tensor(ctx->ggml_ctx, k, heads);
|
||||||
v = reshape_tensor(ctx->ggml_ctx, v, heads);
|
v = reshape_tensor(ctx->ggml_ctx, v, heads);
|
||||||
scale = 1.f / sqrt(sqrt((float)dim_head));
|
scale = 1.f / sqrt(sqrt((float)dim_head));
|
||||||
k = ggml_scale_inplace(ctx->ggml_ctx, k, scale);
|
k = ggml_ext_scale(ctx->ggml_ctx, k, scale, true);
|
||||||
q = ggml_scale_inplace(ctx->ggml_ctx, q, scale);
|
q = ggml_ext_scale(ctx->ggml_ctx, q, scale, true);
|
||||||
// auto weight = ggml_mul_mat(ctx, q, k);
|
// auto weight = ggml_mul_mat(ctx, q, k);
|
||||||
auto weight = ggml_mul_mat(ctx->ggml_ctx, k, q); // NOTE order of mul is opposite to pytorch
|
auto weight = ggml_mul_mat(ctx->ggml_ctx, k, q); // NOTE order of mul is opposite to pytorch
|
||||||
|
|
||||||
@ -176,9 +176,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* latents,
|
ggml_tensor* latents,
|
||||||
struct ggml_tensor* x) {
|
ggml_tensor* x) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto proj_in = std::dynamic_pointer_cast<Linear>(blocks["proj_in"]);
|
auto proj_in = std::dynamic_pointer_cast<Linear>(blocks["proj_in"]);
|
||||||
auto proj_out = std::dynamic_pointer_cast<Linear>(blocks["proj_out"]);
|
auto proj_out = std::dynamic_pointer_cast<Linear>(blocks["proj_out"]);
|
||||||
@ -225,9 +225,9 @@ public:
|
|||||||
4));
|
4));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* last_hidden_state) {
|
ggml_tensor* last_hidden_state) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto token_proj = std::dynamic_pointer_cast<Mlp>(blocks["token_proj"]);
|
auto token_proj = std::dynamic_pointer_cast<Mlp>(blocks["token_proj"]);
|
||||||
auto token_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["token_norm"]);
|
auto token_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["token_norm"]);
|
||||||
@ -237,7 +237,7 @@ public:
|
|||||||
int64_t nel = ggml_nelements(x);
|
int64_t nel = ggml_nelements(x);
|
||||||
x = ggml_reshape_3d(ctx->ggml_ctx, x, cross_attention_dim, num_tokens, nel / (cross_attention_dim * num_tokens));
|
x = ggml_reshape_3d(ctx->ggml_ctx, x, cross_attention_dim, num_tokens, nel / (cross_attention_dim * num_tokens));
|
||||||
x = token_norm->forward(ctx, x);
|
x = token_norm->forward(ctx, x);
|
||||||
struct ggml_tensor* out = perceiver_resampler->forward(ctx, x, last_hidden_state);
|
ggml_tensor* out = perceiver_resampler->forward(ctx, x, last_hidden_state);
|
||||||
if (use_residul)
|
if (use_residul)
|
||||||
out = ggml_add(ctx->ggml_ctx, x, out);
|
out = ggml_add(ctx->ggml_ctx, x, out);
|
||||||
return out;
|
return out;
|
||||||
@ -256,9 +256,9 @@ public:
|
|||||||
blocks["layer_norm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(embed_dim));
|
blocks["layer_norm"] = std::shared_ptr<GGMLBlock>(new LayerNorm(embed_dim));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* fuse_fn(GGMLRunnerContext* ctx,
|
ggml_tensor* fuse_fn(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
struct ggml_tensor* id_embeds) {
|
ggml_tensor* id_embeds) {
|
||||||
auto mlp1 = std::dynamic_pointer_cast<FuseBlock>(blocks["mlp1"]);
|
auto mlp1 = std::dynamic_pointer_cast<FuseBlock>(blocks["mlp1"]);
|
||||||
auto mlp2 = std::dynamic_pointer_cast<FuseBlock>(blocks["mlp2"]);
|
auto mlp2 = std::dynamic_pointer_cast<FuseBlock>(blocks["mlp2"]);
|
||||||
auto layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm"]);
|
auto layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["layer_norm"]);
|
||||||
@ -273,24 +273,24 @@ public:
|
|||||||
return stacked_id_embeds;
|
return stacked_id_embeds;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
struct ggml_tensor* id_embeds,
|
ggml_tensor* id_embeds,
|
||||||
struct ggml_tensor* class_tokens_mask,
|
ggml_tensor* class_tokens_mask,
|
||||||
struct ggml_tensor* class_tokens_mask_pos,
|
ggml_tensor* class_tokens_mask_pos,
|
||||||
struct ggml_tensor* left,
|
ggml_tensor* left,
|
||||||
struct ggml_tensor* right) {
|
ggml_tensor* right) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
|
|
||||||
struct ggml_tensor* valid_id_embeds = id_embeds;
|
ggml_tensor* valid_id_embeds = id_embeds;
|
||||||
// # slice out the image token embeddings
|
// # slice out the image token embeddings
|
||||||
ggml_set_name(class_tokens_mask_pos, "class_tokens_mask_pos");
|
ggml_set_name(class_tokens_mask_pos, "class_tokens_mask_pos");
|
||||||
ggml_set_name(prompt_embeds, "prompt_embeds");
|
ggml_set_name(prompt_embeds, "prompt_embeds");
|
||||||
struct ggml_tensor* image_token_embeds = ggml_get_rows(ctx->ggml_ctx, prompt_embeds, class_tokens_mask_pos);
|
ggml_tensor* image_token_embeds = ggml_get_rows(ctx->ggml_ctx, prompt_embeds, class_tokens_mask_pos);
|
||||||
ggml_set_name(image_token_embeds, "image_token_embeds");
|
ggml_set_name(image_token_embeds, "image_token_embeds");
|
||||||
valid_id_embeds = ggml_reshape_2d(ctx->ggml_ctx, valid_id_embeds, valid_id_embeds->ne[0],
|
valid_id_embeds = ggml_reshape_2d(ctx->ggml_ctx, valid_id_embeds, valid_id_embeds->ne[0],
|
||||||
ggml_nelements(valid_id_embeds) / valid_id_embeds->ne[0]);
|
ggml_nelements(valid_id_embeds) / valid_id_embeds->ne[0]);
|
||||||
struct ggml_tensor* stacked_id_embeds = fuse_fn(ctx, image_token_embeds, valid_id_embeds);
|
ggml_tensor* stacked_id_embeds = fuse_fn(ctx, image_token_embeds, valid_id_embeds);
|
||||||
|
|
||||||
if (left && right) {
|
if (left && right) {
|
||||||
stacked_id_embeds = ggml_concat(ctx->ggml_ctx, left, stacked_id_embeds, 1);
|
stacked_id_embeds = ggml_concat(ctx->ggml_ctx, left, stacked_id_embeds, 1);
|
||||||
@ -304,7 +304,7 @@ public:
|
|||||||
class_tokens_mask = ggml_cont(ctx->ggml_ctx, ggml_transpose(ctx->ggml_ctx, class_tokens_mask));
|
class_tokens_mask = ggml_cont(ctx->ggml_ctx, ggml_transpose(ctx->ggml_ctx, class_tokens_mask));
|
||||||
class_tokens_mask = ggml_repeat(ctx->ggml_ctx, class_tokens_mask, prompt_embeds);
|
class_tokens_mask = ggml_repeat(ctx->ggml_ctx, class_tokens_mask, prompt_embeds);
|
||||||
prompt_embeds = ggml_mul(ctx->ggml_ctx, prompt_embeds, class_tokens_mask);
|
prompt_embeds = ggml_mul(ctx->ggml_ctx, prompt_embeds, class_tokens_mask);
|
||||||
struct ggml_tensor* updated_prompt_embeds = ggml_add(ctx->ggml_ctx, prompt_embeds, stacked_id_embeds);
|
ggml_tensor* updated_prompt_embeds = ggml_add(ctx->ggml_ctx, prompt_embeds, stacked_id_embeds);
|
||||||
ggml_set_name(updated_prompt_embeds, "updated_prompt_embeds");
|
ggml_set_name(updated_prompt_embeds, "updated_prompt_embeds");
|
||||||
return updated_prompt_embeds;
|
return updated_prompt_embeds;
|
||||||
}
|
}
|
||||||
@ -317,22 +317,22 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
|
|||||||
blocks["fuse_module"] = std::shared_ptr<GGMLBlock>(new FuseModule(2048));
|
blocks["fuse_module"] = std::shared_ptr<GGMLBlock>(new FuseModule(2048));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* id_pixel_values,
|
ggml_tensor* id_pixel_values,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
struct ggml_tensor* class_tokens_mask,
|
ggml_tensor* class_tokens_mask,
|
||||||
struct ggml_tensor* class_tokens_mask_pos,
|
ggml_tensor* class_tokens_mask_pos,
|
||||||
struct ggml_tensor* left,
|
ggml_tensor* left,
|
||||||
struct ggml_tensor* right) {
|
ggml_tensor* right) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto vision_model = std::dynamic_pointer_cast<CLIPVisionModel>(blocks["vision_model"]);
|
auto vision_model = std::dynamic_pointer_cast<CLIPVisionModel>(blocks["vision_model"]);
|
||||||
auto visual_projection = std::dynamic_pointer_cast<CLIPProjection>(blocks["visual_projection"]);
|
auto visual_projection = std::dynamic_pointer_cast<CLIPProjection>(blocks["visual_projection"]);
|
||||||
auto visual_projection_2 = std::dynamic_pointer_cast<Linear>(blocks["visual_projection_2"]);
|
auto visual_projection_2 = std::dynamic_pointer_cast<Linear>(blocks["visual_projection_2"]);
|
||||||
auto fuse_module = std::dynamic_pointer_cast<FuseModule>(blocks["fuse_module"]);
|
auto fuse_module = std::dynamic_pointer_cast<FuseModule>(blocks["fuse_module"]);
|
||||||
|
|
||||||
struct ggml_tensor* shared_id_embeds = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
ggml_tensor* shared_id_embeds = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
||||||
struct ggml_tensor* id_embeds = visual_projection->forward(ctx, shared_id_embeds); // [N, proj_dim(768)]
|
ggml_tensor* id_embeds = visual_projection->forward(ctx, shared_id_embeds); // [N, proj_dim(768)]
|
||||||
struct ggml_tensor* id_embeds_2 = visual_projection_2->forward(ctx, shared_id_embeds); // [N, 1280]
|
ggml_tensor* id_embeds_2 = visual_projection_2->forward(ctx, shared_id_embeds); // [N, 1280]
|
||||||
|
|
||||||
id_embeds = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds, 2, 0, 1, 3));
|
id_embeds = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds, 2, 0, 1, 3));
|
||||||
id_embeds_2 = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds_2, 2, 0, 1, 3));
|
id_embeds_2 = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds_2, 2, 0, 1, 3));
|
||||||
@ -340,7 +340,7 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
|
|||||||
id_embeds = ggml_concat(ctx->ggml_ctx, id_embeds, id_embeds_2, 2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right
|
id_embeds = ggml_concat(ctx->ggml_ctx, id_embeds, id_embeds_2, 2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right
|
||||||
id_embeds = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds, 1, 2, 0, 3));
|
id_embeds = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, id_embeds, 1, 2, 0, 3));
|
||||||
|
|
||||||
struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,
|
ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,
|
||||||
prompt_embeds,
|
prompt_embeds,
|
||||||
id_embeds,
|
id_embeds,
|
||||||
class_tokens_mask,
|
class_tokens_mask,
|
||||||
@ -365,24 +365,24 @@ struct PhotoMakerIDEncoder_CLIPInsightfaceExtendtokenBlock : public CLIPVisionMo
|
|||||||
num_tokens));
|
num_tokens));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* id_pixel_values,
|
ggml_tensor* id_pixel_values,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
struct ggml_tensor* class_tokens_mask,
|
ggml_tensor* class_tokens_mask,
|
||||||
struct ggml_tensor* class_tokens_mask_pos,
|
ggml_tensor* class_tokens_mask_pos,
|
||||||
struct ggml_tensor* id_embeds,
|
ggml_tensor* id_embeds,
|
||||||
struct ggml_tensor* left,
|
ggml_tensor* left,
|
||||||
struct ggml_tensor* right) {
|
ggml_tensor* right) {
|
||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto vision_model = std::dynamic_pointer_cast<CLIPVisionModel>(blocks["vision_model"]);
|
auto vision_model = std::dynamic_pointer_cast<CLIPVisionModel>(blocks["vision_model"]);
|
||||||
auto fuse_module = std::dynamic_pointer_cast<FuseModule>(blocks["fuse_module"]);
|
auto fuse_module = std::dynamic_pointer_cast<FuseModule>(blocks["fuse_module"]);
|
||||||
auto qformer_perceiver = std::dynamic_pointer_cast<QFormerPerceiver>(blocks["qformer_perceiver"]);
|
auto qformer_perceiver = std::dynamic_pointer_cast<QFormerPerceiver>(blocks["qformer_perceiver"]);
|
||||||
|
|
||||||
// struct ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
// ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values); // [N, hidden_size]
|
||||||
struct ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values, false); // [N, hidden_size]
|
ggml_tensor* last_hidden_state = vision_model->forward(ctx, id_pixel_values, false); // [N, hidden_size]
|
||||||
id_embeds = qformer_perceiver->forward(ctx, id_embeds, last_hidden_state);
|
id_embeds = qformer_perceiver->forward(ctx, id_embeds, last_hidden_state);
|
||||||
|
|
||||||
struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,
|
ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,
|
||||||
prompt_embeds,
|
prompt_embeds,
|
||||||
id_embeds,
|
id_embeds,
|
||||||
class_tokens_mask,
|
class_tokens_mask,
|
||||||
@ -436,18 +436,18 @@ public:
|
|||||||
return pm_version;
|
return pm_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
if (pm_version == PM_VERSION_1)
|
if (pm_version == PM_VERSION_1)
|
||||||
id_encoder.get_param_tensors(tensors, prefix);
|
id_encoder.get_param_tensors(tensors, prefix);
|
||||||
else if (pm_version == PM_VERSION_2)
|
else if (pm_version == PM_VERSION_2)
|
||||||
id_encoder2.get_param_tensors(tensors, prefix);
|
id_encoder2.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph( // struct ggml_allocr* allocr,
|
ggml_cgraph* build_graph( // ggml_allocr* allocr,
|
||||||
struct ggml_tensor* id_pixel_values,
|
ggml_tensor* id_pixel_values,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
std::vector<bool>& class_tokens_mask,
|
std::vector<bool>& class_tokens_mask,
|
||||||
struct ggml_tensor* id_embeds) {
|
ggml_tensor* id_embeds) {
|
||||||
ctm.clear();
|
ctm.clear();
|
||||||
ctmf16.clear();
|
ctmf16.clear();
|
||||||
ctmpos.clear();
|
ctmpos.clear();
|
||||||
@ -458,20 +458,20 @@ public:
|
|||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
|
|
||||||
struct ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
ggml_cgraph* gf = ggml_new_graph(compute_ctx);
|
||||||
|
|
||||||
int64_t hidden_size = prompt_embeds->ne[0];
|
int64_t hidden_size = prompt_embeds->ne[0];
|
||||||
int64_t seq_length = prompt_embeds->ne[1];
|
int64_t seq_length = prompt_embeds->ne[1];
|
||||||
ggml_type type = GGML_TYPE_F32;
|
ggml_type type = GGML_TYPE_F32;
|
||||||
|
|
||||||
struct ggml_tensor* class_tokens_mask_d = ggml_new_tensor_1d(runner_ctx.ggml_ctx, type, class_tokens_mask.size());
|
ggml_tensor* class_tokens_mask_d = ggml_new_tensor_1d(runner_ctx.ggml_ctx, type, class_tokens_mask.size());
|
||||||
|
|
||||||
struct ggml_tensor* id_pixel_values_d = to_backend(id_pixel_values);
|
ggml_tensor* id_pixel_values_d = to_backend(id_pixel_values);
|
||||||
struct ggml_tensor* prompt_embeds_d = to_backend(prompt_embeds);
|
ggml_tensor* prompt_embeds_d = to_backend(prompt_embeds);
|
||||||
struct ggml_tensor* id_embeds_d = to_backend(id_embeds);
|
ggml_tensor* id_embeds_d = to_backend(id_embeds);
|
||||||
|
|
||||||
struct ggml_tensor* left = nullptr;
|
ggml_tensor* left = nullptr;
|
||||||
struct ggml_tensor* right = nullptr;
|
ggml_tensor* right = nullptr;
|
||||||
for (int i = 0; i < class_tokens_mask.size(); i++) {
|
for (int i = 0; i < class_tokens_mask.size(); i++) {
|
||||||
if (class_tokens_mask[i]) {
|
if (class_tokens_mask[i]) {
|
||||||
// printf(" 1,");
|
// printf(" 1,");
|
||||||
@ -495,7 +495,7 @@ public:
|
|||||||
right = ggml_new_tensor_3d(runner_ctx.ggml_ctx, type,
|
right = ggml_new_tensor_3d(runner_ctx.ggml_ctx, type,
|
||||||
hidden_size, seq_length - ctmpos[ctmpos.size() - 1] - 1, 1);
|
hidden_size, seq_length - ctmpos[ctmpos.size() - 1] - 1, 1);
|
||||||
}
|
}
|
||||||
struct ggml_tensor* class_tokens_mask_pos = ggml_new_tensor_1d(runner_ctx.ggml_ctx, GGML_TYPE_I32, ctmpos.size());
|
ggml_tensor* class_tokens_mask_pos = ggml_new_tensor_1d(runner_ctx.ggml_ctx, GGML_TYPE_I32, ctmpos.size());
|
||||||
|
|
||||||
{
|
{
|
||||||
if (type == GGML_TYPE_F16)
|
if (type == GGML_TYPE_F16)
|
||||||
@ -526,7 +526,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
struct ggml_tensor* updated_prompt_embeds = nullptr;
|
ggml_tensor* updated_prompt_embeds = nullptr;
|
||||||
if (pm_version == PM_VERSION_1)
|
if (pm_version == PM_VERSION_1)
|
||||||
updated_prompt_embeds = id_encoder.forward(&runner_ctx,
|
updated_prompt_embeds = id_encoder.forward(&runner_ctx,
|
||||||
id_pixel_values_d,
|
id_pixel_values_d,
|
||||||
@ -548,25 +548,25 @@ public:
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(const int n_threads,
|
bool compute(const int n_threads,
|
||||||
struct ggml_tensor* id_pixel_values,
|
ggml_tensor* id_pixel_values,
|
||||||
struct ggml_tensor* prompt_embeds,
|
ggml_tensor* prompt_embeds,
|
||||||
struct ggml_tensor* id_embeds,
|
ggml_tensor* id_embeds,
|
||||||
std::vector<bool>& class_tokens_mask,
|
std::vector<bool>& class_tokens_mask,
|
||||||
struct ggml_tensor** updated_prompt_embeds,
|
ggml_tensor** updated_prompt_embeds,
|
||||||
ggml_context* output_ctx) {
|
ggml_context* output_ctx) {
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
// return build_graph(compute_allocr, id_pixel_values, prompt_embeds, class_tokens_mask);
|
// return build_graph(compute_allocr, id_pixel_values, prompt_embeds, class_tokens_mask);
|
||||||
return build_graph(id_pixel_values, prompt_embeds, class_tokens_mask, id_embeds);
|
return build_graph(id_pixel_values, prompt_embeds, class_tokens_mask, id_embeds);
|
||||||
};
|
};
|
||||||
|
|
||||||
// GGMLRunner::compute(get_graph, n_threads, updated_prompt_embeds);
|
// GGMLRunner::compute(get_graph, n_threads, updated_prompt_embeds);
|
||||||
GGMLRunner::compute(get_graph, n_threads, true, updated_prompt_embeds, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, true, updated_prompt_embeds, output_ctx);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PhotoMakerIDEmbed : public GGMLRunner {
|
struct PhotoMakerIDEmbed : public GGMLRunner {
|
||||||
std::map<std::string, struct ggml_tensor*> tensors;
|
std::map<std::string, ggml_tensor*> tensors;
|
||||||
std::string file_path;
|
std::string file_path;
|
||||||
ModelLoader* model_loader;
|
ModelLoader* model_loader;
|
||||||
bool load_failed = false;
|
bool load_failed = false;
|
||||||
@ -606,7 +606,7 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||||||
}
|
}
|
||||||
if (dry_run) {
|
if (dry_run) {
|
||||||
std::lock_guard<std::mutex> lock(tensor_mutex);
|
std::lock_guard<std::mutex> lock(tensor_mutex);
|
||||||
struct ggml_tensor* real = ggml_new_tensor(params_ctx,
|
ggml_tensor* real = ggml_new_tensor(params_ctx,
|
||||||
tensor_storage.type,
|
tensor_storage.type,
|
||||||
tensor_storage.n_dims,
|
tensor_storage.n_dims,
|
||||||
tensor_storage.ne);
|
tensor_storage.ne);
|
||||||
@ -629,8 +629,8 @@ struct PhotoMakerIDEmbed : public GGMLRunner {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* get() {
|
ggml_tensor* get() {
|
||||||
std::map<std::string, struct ggml_tensor*>::iterator pos;
|
std::map<std::string, ggml_tensor*>::iterator pos;
|
||||||
pos = tensors.find("pmid.id_embeds");
|
pos = tensors.find("pmid.id_embeds");
|
||||||
if (pos != tensors.end())
|
if (pos != tensors.end())
|
||||||
return pos->second;
|
return pos->second;
|
||||||
@ -2,15 +2,15 @@
|
|||||||
#define __PREPROCESSING_HPP__
|
#define __PREPROCESSING_HPP__
|
||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
#define M_PI_ 3.14159265358979323846
|
#define M_PI_ 3.14159265358979323846f
|
||||||
|
|
||||||
void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
|
void convolve(ggml_tensor* input, ggml_tensor* output, ggml_tensor* kernel, int padding) {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512
|
params.mem_size = 80 * input->ne[0] * input->ne[1]; // 20M for 512x512
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
struct ggml_context* ctx0 = ggml_init(params);
|
ggml_context* ctx0 = ggml_init(params);
|
||||||
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
|
ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
|
||||||
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel));
|
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel));
|
||||||
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
|
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
|
||||||
ggml_cgraph* gf = ggml_new_graph(ctx0);
|
ggml_cgraph* gf = ggml_new_graph(ctx0);
|
||||||
@ -19,21 +19,21 @@ void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml
|
|||||||
ggml_free(ctx0);
|
ggml_free(ctx0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gaussian_kernel(struct ggml_tensor* kernel) {
|
void gaussian_kernel(ggml_tensor* kernel) {
|
||||||
int ks_mid = kernel->ne[0] / 2;
|
int ks_mid = static_cast<int>(kernel->ne[0] / 2);
|
||||||
float sigma = 1.4f;
|
float sigma = 1.4f;
|
||||||
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
|
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
|
||||||
for (int y = 0; y < kernel->ne[0]; y++) {
|
for (int y = 0; y < kernel->ne[0]; y++) {
|
||||||
float gx = -ks_mid + y;
|
float gx = static_cast<float>(-ks_mid + y);
|
||||||
for (int x = 0; x < kernel->ne[1]; x++) {
|
for (int x = 0; x < kernel->ne[1]; x++) {
|
||||||
float gy = -ks_mid + x;
|
float gy = static_cast<float>(-ks_mid + x);
|
||||||
float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
|
float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
|
||||||
ggml_ext_tensor_set_f32(kernel, k_, x, y);
|
ggml_ext_tensor_set_f32(kernel, k_, x, y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) {
|
void grayscale(ggml_tensor* rgb_img, ggml_tensor* grayscale) {
|
||||||
for (int iy = 0; iy < rgb_img->ne[1]; iy++) {
|
for (int iy = 0; iy < rgb_img->ne[1]; iy++) {
|
||||||
for (int ix = 0; ix < rgb_img->ne[0]; ix++) {
|
for (int ix = 0; ix < rgb_img->ne[0]; ix++) {
|
||||||
float r = ggml_ext_tensor_get_f32(rgb_img, ix, iy);
|
float r = ggml_ext_tensor_get_f32(rgb_img, ix, iy);
|
||||||
@ -45,8 +45,8 @@ void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
|
void prop_hypot(ggml_tensor* x, ggml_tensor* y, ggml_tensor* h) {
|
||||||
int n_elements = ggml_nelements(h);
|
int n_elements = static_cast<int>(ggml_nelements(h));
|
||||||
float* dx = (float*)x->data;
|
float* dx = (float*)x->data;
|
||||||
float* dy = (float*)y->data;
|
float* dy = (float*)y->data;
|
||||||
float* dh = (float*)h->data;
|
float* dh = (float*)h->data;
|
||||||
@ -55,8 +55,8 @@ void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
|
void prop_arctan2(ggml_tensor* x, ggml_tensor* y, ggml_tensor* h) {
|
||||||
int n_elements = ggml_nelements(h);
|
int n_elements = static_cast<int>(ggml_nelements(h));
|
||||||
float* dx = (float*)x->data;
|
float* dx = (float*)x->data;
|
||||||
float* dy = (float*)y->data;
|
float* dy = (float*)y->data;
|
||||||
float* dh = (float*)h->data;
|
float* dh = (float*)h->data;
|
||||||
@ -65,8 +65,8 @@ void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tens
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void normalize_tensor(struct ggml_tensor* g) {
|
void normalize_tensor(ggml_tensor* g) {
|
||||||
int n_elements = ggml_nelements(g);
|
int n_elements = static_cast<int>(ggml_nelements(g));
|
||||||
float* dg = (float*)g->data;
|
float* dg = (float*)g->data;
|
||||||
float max = -INFINITY;
|
float max = -INFINITY;
|
||||||
for (int i = 0; i < n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
@ -78,7 +78,7 @@ void normalize_tensor(struct ggml_tensor* g) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) {
|
void non_max_supression(ggml_tensor* result, ggml_tensor* G, ggml_tensor* D) {
|
||||||
for (int iy = 1; iy < result->ne[1] - 1; iy++) {
|
for (int iy = 1; iy < result->ne[1] - 1; iy++) {
|
||||||
for (int ix = 1; ix < result->ne[0] - 1; ix++) {
|
for (int ix = 1; ix < result->ne[0] - 1; ix++) {
|
||||||
float angle = ggml_ext_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_;
|
float angle = ggml_ext_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_;
|
||||||
@ -117,8 +117,8 @@ void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) {
|
void threshold_hystersis(ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) {
|
||||||
int n_elements = ggml_nelements(img);
|
int n_elements = static_cast<int>(ggml_nelements(img));
|
||||||
float* imd = (float*)img->data;
|
float* imd = (float*)img->data;
|
||||||
float max = -INFINITY;
|
float max = -INFINITY;
|
||||||
for (int i = 0; i < n_elements; i++) {
|
for (int i = 0; i < n_elements; i++) {
|
||||||
@ -163,11 +163,11 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(40 * img.width * img.height); // 10MB for 512x512
|
params.mem_size = static_cast<size_t>(40 * img.width * img.height); // 10MB for 512x512
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
struct ggml_context* work_ctx = ggml_init(params);
|
ggml_context* work_ctx = ggml_init(params);
|
||||||
|
|
||||||
if (!work_ctx) {
|
if (!work_ctx) {
|
||||||
LOG_ERROR("ggml_init() failed");
|
LOG_ERROR("ggml_init() failed");
|
||||||
@ -186,18 +186,18 @@ bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold,
|
|||||||
|
|
||||||
// generate kernel
|
// generate kernel
|
||||||
int kernel_size = 5;
|
int kernel_size = 5;
|
||||||
struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1);
|
ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1);
|
||||||
struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
||||||
memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx));
|
memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx));
|
||||||
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
|
||||||
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
|
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
|
||||||
gaussian_kernel(gkernel);
|
gaussian_kernel(gkernel);
|
||||||
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 3, 1);
|
ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 3, 1);
|
||||||
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 1, 1);
|
ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, img.width, img.height, 1, 1);
|
||||||
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
|
ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
|
ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
|
ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray);
|
ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray);
|
||||||
sd_image_to_ggml_tensor(img, image);
|
sd_image_to_ggml_tensor(img, image);
|
||||||
grayscale(image, image_gray);
|
grayscale(image, image_gray);
|
||||||
convolve(image_gray, image_gray, gkernel, 2);
|
convolve(image_gray, image_gray, gkernel, 2);
|
||||||
@ -209,8 +209,8 @@ bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold,
|
|||||||
non_max_supression(image_gray, G, tetha);
|
non_max_supression(image_gray, G, tetha);
|
||||||
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
|
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
|
||||||
// to RGB channels
|
// to RGB channels
|
||||||
for (int iy = 0; iy < img.height; iy++) {
|
for (uint32_t iy = 0; iy < img.height; iy++) {
|
||||||
for (int ix = 0; ix < img.width; ix++) {
|
for (uint32_t ix = 0; ix < img.width; ix++) {
|
||||||
float gray = ggml_ext_tensor_get_f32(image_gray, ix, iy);
|
float gray = ggml_ext_tensor_get_f32(image_gray, ix, iy);
|
||||||
gray = inverse ? 1.0f - gray : gray;
|
gray = inverse ? 1.0f - gray : gray;
|
||||||
ggml_ext_tensor_set_f32(image, gray, ix, iy);
|
ggml_ext_tensor_set_f32(image, gray, ix, iy);
|
||||||
@ -3,9 +3,8 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "common.hpp"
|
#include "common_block.hpp"
|
||||||
#include "flux.hpp"
|
#include "flux.hpp"
|
||||||
#include "ggml_extend.hpp"
|
|
||||||
|
|
||||||
namespace Qwen {
|
namespace Qwen {
|
||||||
constexpr int QWEN_IMAGE_GRAPH_SIZE = 20480;
|
constexpr int QWEN_IMAGE_GRAPH_SIZE = 20480;
|
||||||
@ -27,9 +26,9 @@ namespace Qwen {
|
|||||||
blocks["linear_2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, out_dim, sample_proj_bias));
|
blocks["linear_2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, out_dim, sample_proj_bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* sample,
|
ggml_tensor* sample,
|
||||||
struct ggml_tensor* condition = nullptr) {
|
ggml_tensor* condition = nullptr) {
|
||||||
if (condition != nullptr) {
|
if (condition != nullptr) {
|
||||||
auto cond_proj = std::dynamic_pointer_cast<Linear>(blocks["cond_proj"]);
|
auto cond_proj = std::dynamic_pointer_cast<Linear>(blocks["cond_proj"]);
|
||||||
sample = ggml_add(ctx->ggml_ctx, sample, cond_proj->forward(ctx, condition));
|
sample = ggml_add(ctx->ggml_ctx, sample, cond_proj->forward(ctx, condition));
|
||||||
@ -50,8 +49,8 @@ namespace Qwen {
|
|||||||
blocks["timestep_embedder"] = std::shared_ptr<GGMLBlock>(new TimestepEmbedding(256, embedding_dim));
|
blocks["timestep_embedder"] = std::shared_ptr<GGMLBlock>(new TimestepEmbedding(256, embedding_dim));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* timesteps) {
|
ggml_tensor* timesteps) {
|
||||||
// timesteps: [N,]
|
// timesteps: [N,]
|
||||||
// return: [N, embedding_dim]
|
// return: [N, embedding_dim]
|
||||||
auto timestep_embedder = std::dynamic_pointer_cast<TimestepEmbedding>(blocks["timestep_embedder"]);
|
auto timestep_embedder = std::dynamic_pointer_cast<TimestepEmbedding>(blocks["timestep_embedder"]);
|
||||||
@ -108,10 +107,10 @@ namespace Qwen {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* img,
|
ggml_tensor* img,
|
||||||
struct ggml_tensor* txt,
|
ggml_tensor* txt,
|
||||||
struct ggml_tensor* pe,
|
ggml_tensor* pe,
|
||||||
struct ggml_tensor* mask = nullptr) {
|
ggml_tensor* mask = nullptr) {
|
||||||
// img: [N, n_img_token, hidden_size]
|
// img: [N, n_img_token, hidden_size]
|
||||||
// txt: [N, n_txt_token, hidden_size]
|
// txt: [N, n_txt_token, hidden_size]
|
||||||
// pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
|
// pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
|
||||||
@ -163,25 +162,24 @@ namespace Qwen {
|
|||||||
auto v = ggml_concat(ctx->ggml_ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
auto v = ggml_concat(ctx->ggml_ctx, txt_v, img_v, 2); // [N, n_txt_token + n_img_token, n_head, d_head]
|
||||||
|
|
||||||
auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
auto attn = Rope::attention(ctx, q, k, v, pe, mask, (1.0f / 128.f)); // [N, n_txt_token + n_img_token, n_head*d_head]
|
||||||
attn = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, attn, 0, 2, 1, 3)); // [n_txt_token + n_img_token, N, hidden_size]
|
|
||||||
auto txt_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
auto txt_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
||||||
attn,
|
attn,
|
||||||
attn->ne[0],
|
attn->ne[0],
|
||||||
attn->ne[1],
|
|
||||||
txt->ne[1],
|
txt->ne[1],
|
||||||
|
attn->ne[2],
|
||||||
attn->nb[1],
|
attn->nb[1],
|
||||||
attn->nb[2],
|
attn->nb[2],
|
||||||
0); // [n_txt_token, N, hidden_size]
|
0); // [N, n_txt_token, n_head*d_head]
|
||||||
txt_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, txt_attn_out, 0, 2, 1, 3)); // [N, n_txt_token, hidden_size]
|
|
||||||
auto img_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
auto img_attn_out = ggml_view_3d(ctx->ggml_ctx,
|
||||||
attn,
|
attn,
|
||||||
attn->ne[0],
|
attn->ne[0],
|
||||||
attn->ne[1],
|
|
||||||
img->ne[1],
|
img->ne[1],
|
||||||
|
attn->ne[2],
|
||||||
attn->nb[1],
|
attn->nb[1],
|
||||||
attn->nb[2],
|
attn->nb[2],
|
||||||
attn->nb[2] * txt->ne[1]); // [n_img_token, N, hidden_size]
|
txt->ne[1] * attn->nb[1]); // [N, n_img_token, n_head*d_head]
|
||||||
img_attn_out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, img_attn_out, 0, 2, 1, 3)); // [N, n_img_token, hidden_size]
|
img_attn_out = ggml_cont(ctx->ggml_ctx, img_attn_out);
|
||||||
|
txt_attn_out = ggml_cont(ctx->ggml_ctx, txt_attn_out);
|
||||||
|
|
||||||
img_attn_out = to_out_0->forward(ctx, img_attn_out);
|
img_attn_out = to_out_0->forward(ctx, img_attn_out);
|
||||||
txt_attn_out = to_add_out->forward(ctx, txt_attn_out);
|
txt_attn_out = to_add_out->forward(ctx, txt_attn_out);
|
||||||
@ -191,11 +189,16 @@ namespace Qwen {
|
|||||||
};
|
};
|
||||||
|
|
||||||
class QwenImageTransformerBlock : public GGMLBlock {
|
class QwenImageTransformerBlock : public GGMLBlock {
|
||||||
|
protected:
|
||||||
|
bool zero_cond_t;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
QwenImageTransformerBlock(int64_t dim,
|
QwenImageTransformerBlock(int64_t dim,
|
||||||
int64_t num_attention_heads,
|
int64_t num_attention_heads,
|
||||||
int64_t attention_head_dim,
|
int64_t attention_head_dim,
|
||||||
float eps = 1e-6) {
|
float eps = 1e-6,
|
||||||
|
bool zero_cond_t = false)
|
||||||
|
: zero_cond_t(zero_cond_t) {
|
||||||
// img_mod.0 is nn.SiLU()
|
// img_mod.0 is nn.SiLU()
|
||||||
blocks["img_mod.1"] = std::shared_ptr<GGMLBlock>(new Linear(dim, 6 * dim, true));
|
blocks["img_mod.1"] = std::shared_ptr<GGMLBlock>(new Linear(dim, 6 * dim, true));
|
||||||
|
|
||||||
@ -208,7 +211,7 @@ namespace Qwen {
|
|||||||
|
|
||||||
blocks["txt_norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
|
blocks["txt_norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
|
||||||
blocks["txt_norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
|
blocks["txt_norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim, eps, false));
|
||||||
blocks["txt_mlp"] = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU));
|
blocks["txt_mlp"] = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim, 4, FeedForward::Activation::GELU, true));
|
||||||
|
|
||||||
blocks["attn"] = std::shared_ptr<GGMLBlock>(new QwenImageAttention(dim,
|
blocks["attn"] = std::shared_ptr<GGMLBlock>(new QwenImageAttention(dim,
|
||||||
attention_head_dim,
|
attention_head_dim,
|
||||||
@ -220,11 +223,37 @@ namespace Qwen {
|
|||||||
eps));
|
eps));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<ggml_tensor*> get_mod_params_vec(ggml_context* ctx, ggml_tensor* mod_params, ggml_tensor* index = nullptr) {
|
||||||
|
// index: [N, n_img_token]
|
||||||
|
// mod_params: [N, hidden_size * 12]
|
||||||
|
if (index == nullptr) {
|
||||||
|
return ggml_ext_chunk(ctx, mod_params, 6, 0);
|
||||||
|
}
|
||||||
|
mod_params = ggml_reshape_1d(ctx, mod_params, ggml_nelements(mod_params));
|
||||||
|
auto mod_params_vec = ggml_ext_chunk(ctx, mod_params, 12, 0);
|
||||||
|
index = ggml_reshape_3d(ctx, index, 1, index->ne[0], index->ne[1]); // [N, n_img_token, 1]
|
||||||
|
index = ggml_repeat_4d(ctx, index, mod_params_vec[0]->ne[0], index->ne[1], index->ne[2], index->ne[3]); // [N, n_img_token, hidden_size]
|
||||||
|
std::vector<ggml_tensor*> mod_results;
|
||||||
|
for (int i = 0; i < 6; i++) {
|
||||||
|
auto mod_0 = mod_params_vec[i];
|
||||||
|
auto mod_1 = mod_params_vec[i + 6];
|
||||||
|
|
||||||
|
// mod_result = torch.where(index == 0, mod_0, mod_1)
|
||||||
|
// mod_result = (1 - index)*mod_0 + index*mod_1
|
||||||
|
mod_0 = ggml_sub(ctx, ggml_repeat(ctx, mod_0, index), ggml_mul(ctx, index, mod_0)); // [N, n_img_token, hidden_size]
|
||||||
|
mod_1 = ggml_mul(ctx, index, mod_1); // [N, n_img_token, hidden_size]
|
||||||
|
auto mod_result = ggml_add(ctx, mod_0, mod_1);
|
||||||
|
mod_results.push_back(mod_result);
|
||||||
|
}
|
||||||
|
return mod_results;
|
||||||
|
}
|
||||||
|
|
||||||
virtual std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
virtual std::pair<ggml_tensor*, ggml_tensor*> forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* img,
|
ggml_tensor* img,
|
||||||
struct ggml_tensor* txt,
|
ggml_tensor* txt,
|
||||||
struct ggml_tensor* t_emb,
|
ggml_tensor* t_emb,
|
||||||
struct ggml_tensor* pe) {
|
ggml_tensor* pe,
|
||||||
|
ggml_tensor* modulate_index = nullptr) {
|
||||||
// img: [N, n_img_token, hidden_size]
|
// img: [N, n_img_token, hidden_size]
|
||||||
// txt: [N, n_txt_token, hidden_size]
|
// txt: [N, n_txt_token, hidden_size]
|
||||||
// pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
|
// pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
|
||||||
@ -244,14 +273,18 @@ namespace Qwen {
|
|||||||
|
|
||||||
auto img_mod_params = ggml_silu(ctx->ggml_ctx, t_emb);
|
auto img_mod_params = ggml_silu(ctx->ggml_ctx, t_emb);
|
||||||
img_mod_params = img_mod_1->forward(ctx, img_mod_params);
|
img_mod_params = img_mod_1->forward(ctx, img_mod_params);
|
||||||
auto img_mod_param_vec = ggml_ext_chunk(ctx->ggml_ctx, img_mod_params, 6, 0);
|
auto img_mod_param_vec = get_mod_params_vec(ctx->ggml_ctx, img_mod_params, modulate_index);
|
||||||
|
|
||||||
|
if (zero_cond_t) {
|
||||||
|
t_emb = ggml_ext_chunk(ctx->ggml_ctx, t_emb, 2, 1)[0];
|
||||||
|
}
|
||||||
|
|
||||||
auto txt_mod_params = ggml_silu(ctx->ggml_ctx, t_emb);
|
auto txt_mod_params = ggml_silu(ctx->ggml_ctx, t_emb);
|
||||||
txt_mod_params = txt_mod_1->forward(ctx, txt_mod_params);
|
txt_mod_params = txt_mod_1->forward(ctx, txt_mod_params);
|
||||||
auto txt_mod_param_vec = ggml_ext_chunk(ctx->ggml_ctx, txt_mod_params, 6, 0);
|
auto txt_mod_param_vec = get_mod_params_vec(ctx->ggml_ctx, txt_mod_params);
|
||||||
|
|
||||||
auto img_normed = img_norm1->forward(ctx, img);
|
auto img_normed = img_norm1->forward(ctx, img);
|
||||||
auto img_modulated = Flux::modulate(ctx->ggml_ctx, img_normed, img_mod_param_vec[0], img_mod_param_vec[1]);
|
auto img_modulated = Flux::modulate(ctx->ggml_ctx, img_normed, img_mod_param_vec[0], img_mod_param_vec[1], modulate_index != nullptr);
|
||||||
auto img_gate1 = img_mod_param_vec[2];
|
auto img_gate1 = img_mod_param_vec[2];
|
||||||
|
|
||||||
auto txt_normed = txt_norm1->forward(ctx, txt);
|
auto txt_normed = txt_norm1->forward(ctx, txt);
|
||||||
@ -264,7 +297,7 @@ namespace Qwen {
|
|||||||
txt = ggml_add(ctx->ggml_ctx, txt, ggml_mul(ctx->ggml_ctx, txt_attn_output, txt_gate1));
|
txt = ggml_add(ctx->ggml_ctx, txt, ggml_mul(ctx->ggml_ctx, txt_attn_output, txt_gate1));
|
||||||
|
|
||||||
auto img_normed2 = img_norm2->forward(ctx, img);
|
auto img_normed2 = img_norm2->forward(ctx, img);
|
||||||
auto img_modulated2 = Flux::modulate(ctx->ggml_ctx, img_normed2, img_mod_param_vec[3], img_mod_param_vec[4]);
|
auto img_modulated2 = Flux::modulate(ctx->ggml_ctx, img_normed2, img_mod_param_vec[3], img_mod_param_vec[4], modulate_index != nullptr);
|
||||||
auto img_gate2 = img_mod_param_vec[5];
|
auto img_gate2 = img_mod_param_vec[5];
|
||||||
|
|
||||||
auto txt_normed2 = txt_norm2->forward(ctx, txt);
|
auto txt_normed2 = txt_norm2->forward(ctx, txt);
|
||||||
@ -292,9 +325,9 @@ namespace Qwen {
|
|||||||
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(conditioning_embedding_dim, embedding_dim * 2, bias));
|
blocks["linear"] = std::shared_ptr<GGMLBlock>(new Linear(conditioning_embedding_dim, embedding_dim * 2, bias));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* c) {
|
ggml_tensor* c) {
|
||||||
// x: [N, n_token, hidden_size]
|
// x: [N, n_token, hidden_size]
|
||||||
// c: [N, hidden_size]
|
// c: [N, hidden_size]
|
||||||
// return: [N, n_token, patch_size * patch_size * out_channels]
|
// return: [N, n_token, patch_size * patch_size * out_channels]
|
||||||
@ -315,16 +348,17 @@ namespace Qwen {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct QwenImageParams {
|
struct QwenImageParams {
|
||||||
int64_t patch_size = 2;
|
int patch_size = 2;
|
||||||
int64_t in_channels = 64;
|
int64_t in_channels = 64;
|
||||||
int64_t out_channels = 16;
|
int64_t out_channels = 16;
|
||||||
int64_t num_layers = 60;
|
int num_layers = 60;
|
||||||
int64_t attention_head_dim = 128;
|
int64_t attention_head_dim = 128;
|
||||||
int64_t num_attention_heads = 24;
|
int64_t num_attention_heads = 24;
|
||||||
int64_t joint_attention_dim = 3584;
|
int64_t joint_attention_dim = 3584;
|
||||||
float theta = 10000;
|
int theta = 10000;
|
||||||
std::vector<int> axes_dim = {16, 56, 56};
|
std::vector<int> axes_dim = {16, 56, 56};
|
||||||
int64_t axes_dim_sum = 128;
|
int axes_dim_sum = 128;
|
||||||
|
bool zero_cond_t = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
class QwenImageModel : public GGMLBlock {
|
class QwenImageModel : public GGMLBlock {
|
||||||
@ -346,7 +380,8 @@ namespace Qwen {
|
|||||||
auto block = std::shared_ptr<GGMLBlock>(new QwenImageTransformerBlock(inner_dim,
|
auto block = std::shared_ptr<GGMLBlock>(new QwenImageTransformerBlock(inner_dim,
|
||||||
params.num_attention_heads,
|
params.num_attention_heads,
|
||||||
params.attention_head_dim,
|
params.attention_head_dim,
|
||||||
1e-6f));
|
1e-6f,
|
||||||
|
params.zero_cond_t));
|
||||||
blocks["transformer_blocks." + std::to_string(i)] = block;
|
blocks["transformer_blocks." + std::to_string(i)] = block;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,74 +389,12 @@ namespace Qwen {
|
|||||||
blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, params.patch_size * params.patch_size * params.out_channels));
|
blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, params.patch_size * params.patch_size * params.out_channels));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* pad_to_patch_size(struct ggml_context* ctx,
|
ggml_tensor* forward_orig(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x) {
|
ggml_tensor* x,
|
||||||
int64_t W = x->ne[0];
|
ggml_tensor* timestep,
|
||||||
int64_t H = x->ne[1];
|
ggml_tensor* context,
|
||||||
|
ggml_tensor* pe,
|
||||||
int pad_h = (params.patch_size - H % params.patch_size) % params.patch_size;
|
ggml_tensor* modulate_index = nullptr) {
|
||||||
int pad_w = (params.patch_size - W % params.patch_size) % params.patch_size;
|
|
||||||
x = ggml_pad(ctx, x, pad_w, pad_h, 0, 0); // [N, C, H + pad_h, W + pad_w]
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor* patchify(struct ggml_context* ctx,
|
|
||||||
struct ggml_tensor* x) {
|
|
||||||
// x: [N, C, H, W]
|
|
||||||
// return: [N, h*w, C * patch_size * patch_size]
|
|
||||||
int64_t N = x->ne[3];
|
|
||||||
int64_t C = x->ne[2];
|
|
||||||
int64_t H = x->ne[1];
|
|
||||||
int64_t W = x->ne[0];
|
|
||||||
int64_t p = params.patch_size;
|
|
||||||
int64_t h = H / params.patch_size;
|
|
||||||
int64_t w = W / params.patch_size;
|
|
||||||
|
|
||||||
GGML_ASSERT(h * p == H && w * p == W);
|
|
||||||
|
|
||||||
x = ggml_reshape_4d(ctx, x, p, w, p, h * C * N); // [N*C*h, p, w, p]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, w, p, p]
|
|
||||||
x = ggml_reshape_4d(ctx, x, p * p, w * h, C, N); // [N, C, h*w, p*p]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, h*w, C, p*p]
|
|
||||||
x = ggml_reshape_3d(ctx, x, p * p * C, w * h, N); // [N, h*w, C*p*p]
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor* process_img(struct ggml_context* ctx,
|
|
||||||
struct ggml_tensor* x) {
|
|
||||||
x = pad_to_patch_size(ctx, x);
|
|
||||||
x = patchify(ctx, x);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor* unpatchify(struct ggml_context* ctx,
|
|
||||||
struct ggml_tensor* x,
|
|
||||||
int64_t h,
|
|
||||||
int64_t w) {
|
|
||||||
// x: [N, h*w, C*patch_size*patch_size]
|
|
||||||
// return: [N, C, H, W]
|
|
||||||
int64_t N = x->ne[2];
|
|
||||||
int64_t C = x->ne[0] / params.patch_size / params.patch_size;
|
|
||||||
int64_t H = h * params.patch_size;
|
|
||||||
int64_t W = w * params.patch_size;
|
|
||||||
int64_t p = params.patch_size;
|
|
||||||
|
|
||||||
GGML_ASSERT(C * p * p == x->ne[0]);
|
|
||||||
|
|
||||||
x = ggml_reshape_4d(ctx, x, p * p, C, w * h, N); // [N, h*w, C, p*p]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N, C, h*w, p*p]
|
|
||||||
x = ggml_reshape_4d(ctx, x, p, p, w, h * C * N); // [N*C*h, w, p, p]
|
|
||||||
x = ggml_cont(ctx, ggml_permute(ctx, x, 0, 2, 1, 3)); // [N*C*h, p, w, p]
|
|
||||||
x = ggml_reshape_4d(ctx, x, W, H, C, N); // [N, C, h*p, w*p]
|
|
||||||
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ggml_tensor* forward_orig(GGMLRunnerContext* ctx,
|
|
||||||
struct ggml_tensor* x,
|
|
||||||
struct ggml_tensor* timestep,
|
|
||||||
struct ggml_tensor* context,
|
|
||||||
struct ggml_tensor* pe) {
|
|
||||||
auto time_text_embed = std::dynamic_pointer_cast<QwenTimestepProjEmbeddings>(blocks["time_text_embed"]);
|
auto time_text_embed = std::dynamic_pointer_cast<QwenTimestepProjEmbeddings>(blocks["time_text_embed"]);
|
||||||
auto txt_norm = std::dynamic_pointer_cast<RMSNorm>(blocks["txt_norm"]);
|
auto txt_norm = std::dynamic_pointer_cast<RMSNorm>(blocks["txt_norm"]);
|
||||||
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
auto img_in = std::dynamic_pointer_cast<Linear>(blocks["img_in"]);
|
||||||
@ -430,6 +403,10 @@ namespace Qwen {
|
|||||||
auto proj_out = std::dynamic_pointer_cast<Linear>(blocks["proj_out"]);
|
auto proj_out = std::dynamic_pointer_cast<Linear>(blocks["proj_out"]);
|
||||||
|
|
||||||
auto t_emb = time_text_embed->forward(ctx, timestep);
|
auto t_emb = time_text_embed->forward(ctx, timestep);
|
||||||
|
if (params.zero_cond_t) {
|
||||||
|
auto t_emb_0 = time_text_embed->forward(ctx, ggml_ext_zeros_like(ctx->ggml_ctx, timestep));
|
||||||
|
t_emb = ggml_concat(ctx->ggml_ctx, t_emb, t_emb_0, 1);
|
||||||
|
}
|
||||||
auto img = img_in->forward(ctx, x);
|
auto img = img_in->forward(ctx, x);
|
||||||
auto txt = txt_norm->forward(ctx, context);
|
auto txt = txt_norm->forward(ctx, context);
|
||||||
txt = txt_in->forward(ctx, txt);
|
txt = txt_in->forward(ctx, txt);
|
||||||
@ -437,23 +414,28 @@ namespace Qwen {
|
|||||||
for (int i = 0; i < params.num_layers; i++) {
|
for (int i = 0; i < params.num_layers; i++) {
|
||||||
auto block = std::dynamic_pointer_cast<QwenImageTransformerBlock>(blocks["transformer_blocks." + std::to_string(i)]);
|
auto block = std::dynamic_pointer_cast<QwenImageTransformerBlock>(blocks["transformer_blocks." + std::to_string(i)]);
|
||||||
|
|
||||||
auto result = block->forward(ctx, img, txt, t_emb, pe);
|
auto result = block->forward(ctx, img, txt, t_emb, pe, modulate_index);
|
||||||
img = result.first;
|
img = result.first;
|
||||||
txt = result.second;
|
txt = result.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.zero_cond_t) {
|
||||||
|
t_emb = ggml_ext_chunk(ctx->ggml_ctx, t_emb, 2, 1)[0];
|
||||||
|
}
|
||||||
|
|
||||||
img = norm_out->forward(ctx, img, t_emb);
|
img = norm_out->forward(ctx, img, t_emb);
|
||||||
img = proj_out->forward(ctx, img);
|
img = proj_out->forward(ctx, img);
|
||||||
|
|
||||||
return img;
|
return img;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor* forward(GGMLRunnerContext* ctx,
|
ggml_tensor* forward(GGMLRunnerContext* ctx,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* timestep,
|
ggml_tensor* timestep,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
struct ggml_tensor* pe,
|
ggml_tensor* pe,
|
||||||
std::vector<ggml_tensor*> ref_latents = {}) {
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
|
ggml_tensor* modulate_index = nullptr) {
|
||||||
// Forward pass of DiT.
|
// Forward pass of DiT.
|
||||||
// x: [N, C, H, W]
|
// x: [N, C, H, W]
|
||||||
// timestep: [N,]
|
// timestep: [N,]
|
||||||
@ -466,20 +448,17 @@ namespace Qwen {
|
|||||||
int64_t C = x->ne[2];
|
int64_t C = x->ne[2];
|
||||||
int64_t N = x->ne[3];
|
int64_t N = x->ne[3];
|
||||||
|
|
||||||
auto img = process_img(ctx->ggml_ctx, x);
|
auto img = DiT::pad_and_patchify(ctx, x, params.patch_size, params.patch_size);
|
||||||
uint64_t img_tokens = img->ne[1];
|
int64_t img_tokens = img->ne[1];
|
||||||
|
|
||||||
if (ref_latents.size() > 0) {
|
if (ref_latents.size() > 0) {
|
||||||
for (ggml_tensor* ref : ref_latents) {
|
for (ggml_tensor* ref : ref_latents) {
|
||||||
ref = process_img(ctx->ggml_ctx, ref);
|
ref = DiT::pad_and_patchify(ctx, ref, params.patch_size, params.patch_size);
|
||||||
img = ggml_concat(ctx->ggml_ctx, img, ref, 1);
|
img = ggml_concat(ctx->ggml_ctx, img, ref, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t h_len = ((H + (params.patch_size / 2)) / params.patch_size);
|
auto out = forward_orig(ctx, img, timestep, context, pe, modulate_index); // [N, h_len*w_len, ph*pw*C]
|
||||||
int64_t w_len = ((W + (params.patch_size / 2)) / params.patch_size);
|
|
||||||
|
|
||||||
auto out = forward_orig(ctx, img, timestep, context, pe); // [N, h_len*w_len, ph*pw*C]
|
|
||||||
|
|
||||||
if (out->ne[1] > img_tokens) {
|
if (out->ne[1] > img_tokens) {
|
||||||
out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, out, 0, 2, 1, 3)); // [num_tokens, N, C * patch_size * patch_size]
|
||||||
@ -487,11 +466,7 @@ namespace Qwen {
|
|||||||
out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
|
out = ggml_cont(ctx->ggml_ctx, ggml_permute(ctx->ggml_ctx, out, 0, 2, 1, 3)); // [N, h*w, C * patch_size * patch_size]
|
||||||
}
|
}
|
||||||
|
|
||||||
out = unpatchify(ctx->ggml_ctx, out, h_len, w_len); // [N, C, H + pad_h, W + pad_w]
|
out = DiT::unpatchify_and_crop(ctx->ggml_ctx, out, H, W, params.patch_size, params.patch_size); // [N, C, H, W]
|
||||||
|
|
||||||
// slice
|
|
||||||
out = ggml_ext_slice(ctx->ggml_ctx, out, 1, 0, H); // [N, C, H, W + pad_w]
|
|
||||||
out = ggml_ext_slice(ctx->ggml_ctx, out, 0, 0, W); // [N, C, H, W]
|
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -502,19 +477,25 @@ namespace Qwen {
|
|||||||
QwenImageParams qwen_image_params;
|
QwenImageParams qwen_image_params;
|
||||||
QwenImageModel qwen_image;
|
QwenImageModel qwen_image;
|
||||||
std::vector<float> pe_vec;
|
std::vector<float> pe_vec;
|
||||||
|
std::vector<float> modulate_index_vec;
|
||||||
SDVersion version;
|
SDVersion version;
|
||||||
|
|
||||||
QwenImageRunner(ggml_backend_t backend,
|
QwenImageRunner(ggml_backend_t backend,
|
||||||
bool offload_params_to_cpu,
|
bool offload_params_to_cpu,
|
||||||
const String2TensorStorage& tensor_storage_map = {},
|
const String2TensorStorage& tensor_storage_map = {},
|
||||||
const std::string prefix = "",
|
const std::string prefix = "",
|
||||||
SDVersion version = VERSION_QWEN_IMAGE)
|
SDVersion version = VERSION_QWEN_IMAGE,
|
||||||
|
bool zero_cond_t = false)
|
||||||
: GGMLRunner(backend, offload_params_to_cpu) {
|
: GGMLRunner(backend, offload_params_to_cpu) {
|
||||||
qwen_image_params.num_layers = 0;
|
qwen_image_params.num_layers = 0;
|
||||||
|
qwen_image_params.zero_cond_t = zero_cond_t;
|
||||||
for (auto pair : tensor_storage_map) {
|
for (auto pair : tensor_storage_map) {
|
||||||
std::string tensor_name = pair.first;
|
std::string tensor_name = pair.first;
|
||||||
if (tensor_name.find(prefix) == std::string::npos)
|
if (tensor_name.find(prefix) == std::string::npos)
|
||||||
continue;
|
continue;
|
||||||
|
if (tensor_name.find("__index_timestep_zero__") != std::string::npos) {
|
||||||
|
qwen_image_params.zero_cond_t = true;
|
||||||
|
}
|
||||||
size_t pos = tensor_name.find("transformer_blocks.");
|
size_t pos = tensor_name.find("transformer_blocks.");
|
||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
tensor_name = tensor_name.substr(pos); // remove prefix
|
tensor_name = tensor_name.substr(pos); // remove prefix
|
||||||
@ -529,6 +510,9 @@ namespace Qwen {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG_INFO("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
LOG_INFO("qwen_image_params.num_layers: %ld", qwen_image_params.num_layers);
|
||||||
|
if (qwen_image_params.zero_cond_t) {
|
||||||
|
LOG_INFO("use zero_cond_t");
|
||||||
|
}
|
||||||
qwen_image = QwenImageModel(qwen_image_params);
|
qwen_image = QwenImageModel(qwen_image_params);
|
||||||
qwen_image.init(params_ctx, tensor_storage_map, prefix);
|
qwen_image.init(params_ctx, tensor_storage_map, prefix);
|
||||||
}
|
}
|
||||||
@ -537,17 +521,17 @@ namespace Qwen {
|
|||||||
return "qwen_image";
|
return "qwen_image";
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors, const std::string prefix) {
|
void get_param_tensors(std::map<std::string, ggml_tensor*>& tensors, const std::string prefix) {
|
||||||
qwen_image.get_param_tensors(tensors, prefix);
|
qwen_image.get_param_tensors(tensors, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph* build_graph(struct ggml_tensor* x,
|
ggml_cgraph* build_graph(ggml_tensor* x,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
bool increase_ref_index = false) {
|
bool increase_ref_index = false) {
|
||||||
GGML_ASSERT(x->ne[3] == 1);
|
GGML_ASSERT(x->ne[3] == 1);
|
||||||
struct ggml_cgraph* gf = new_graph_custom(QWEN_IMAGE_GRAPH_SIZE);
|
ggml_cgraph* gf = new_graph_custom(QWEN_IMAGE_GRAPH_SIZE);
|
||||||
|
|
||||||
x = to_backend(x);
|
x = to_backend(x);
|
||||||
context = to_backend(context);
|
context = to_backend(context);
|
||||||
@ -557,16 +541,18 @@ namespace Qwen {
|
|||||||
ref_latents[i] = to_backend(ref_latents[i]);
|
ref_latents[i] = to_backend(ref_latents[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pe_vec = Rope::gen_qwen_image_pe(x->ne[1],
|
pe_vec = Rope::gen_qwen_image_pe(static_cast<int>(x->ne[1]),
|
||||||
x->ne[0],
|
static_cast<int>(x->ne[0]),
|
||||||
qwen_image_params.patch_size,
|
qwen_image_params.patch_size,
|
||||||
x->ne[3],
|
static_cast<int>(x->ne[3]),
|
||||||
context->ne[1],
|
static_cast<int>(context->ne[1]),
|
||||||
ref_latents,
|
ref_latents,
|
||||||
increase_ref_index,
|
increase_ref_index,
|
||||||
qwen_image_params.theta,
|
qwen_image_params.theta,
|
||||||
|
circular_y_enabled,
|
||||||
|
circular_x_enabled,
|
||||||
qwen_image_params.axes_dim);
|
qwen_image_params.axes_dim);
|
||||||
int pos_len = pe_vec.size() / qwen_image_params.axes_dim_sum / 2;
|
int pos_len = static_cast<int>(pe_vec.size() / qwen_image_params.axes_dim_sum / 2);
|
||||||
// LOG_DEBUG("pos_len %d", pos_len);
|
// LOG_DEBUG("pos_len %d", pos_len);
|
||||||
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, qwen_image_params.axes_dim_sum / 2, pos_len);
|
auto pe = ggml_new_tensor_4d(compute_ctx, GGML_TYPE_F32, 2, 2, qwen_image_params.axes_dim_sum / 2, pos_len);
|
||||||
// pe->data = pe_vec.data();
|
// pe->data = pe_vec.data();
|
||||||
@ -574,45 +560,71 @@ namespace Qwen {
|
|||||||
// pe->data = nullptr;
|
// pe->data = nullptr;
|
||||||
set_backend_tensor_data(pe, pe_vec.data());
|
set_backend_tensor_data(pe, pe_vec.data());
|
||||||
|
|
||||||
|
ggml_tensor* modulate_index = nullptr;
|
||||||
|
if (qwen_image_params.zero_cond_t) {
|
||||||
|
modulate_index_vec.clear();
|
||||||
|
|
||||||
|
int64_t h_len = ((x->ne[1] + (qwen_image_params.patch_size / 2)) / qwen_image_params.patch_size);
|
||||||
|
int64_t w_len = ((x->ne[0] + (qwen_image_params.patch_size / 2)) / qwen_image_params.patch_size);
|
||||||
|
int64_t num_img_tokens = h_len * w_len;
|
||||||
|
|
||||||
|
modulate_index_vec.insert(modulate_index_vec.end(), num_img_tokens, 0.f);
|
||||||
|
int64_t num_ref_img_tokens = 0;
|
||||||
|
for (ggml_tensor* ref : ref_latents) {
|
||||||
|
int64_t h_len = ((ref->ne[1] + (qwen_image_params.patch_size / 2)) / qwen_image_params.patch_size);
|
||||||
|
int64_t w_len = ((ref->ne[0] + (qwen_image_params.patch_size / 2)) / qwen_image_params.patch_size);
|
||||||
|
|
||||||
|
num_ref_img_tokens += h_len * w_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_ref_img_tokens > 0) {
|
||||||
|
modulate_index_vec.insert(modulate_index_vec.end(), num_ref_img_tokens, 1.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
modulate_index = ggml_new_tensor_1d(compute_ctx, GGML_TYPE_F32, modulate_index_vec.size());
|
||||||
|
set_backend_tensor_data(modulate_index, modulate_index_vec.data());
|
||||||
|
}
|
||||||
|
|
||||||
auto runner_ctx = get_context();
|
auto runner_ctx = get_context();
|
||||||
|
|
||||||
struct ggml_tensor* out = qwen_image.forward(&runner_ctx,
|
ggml_tensor* out = qwen_image.forward(&runner_ctx,
|
||||||
x,
|
x,
|
||||||
timesteps,
|
timesteps,
|
||||||
context,
|
context,
|
||||||
pe,
|
pe,
|
||||||
ref_latents);
|
ref_latents,
|
||||||
|
modulate_index);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, out);
|
ggml_build_forward_expand(gf, out);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute(int n_threads,
|
bool compute(int n_threads,
|
||||||
struct ggml_tensor* x,
|
ggml_tensor* x,
|
||||||
struct ggml_tensor* timesteps,
|
ggml_tensor* timesteps,
|
||||||
struct ggml_tensor* context,
|
ggml_tensor* context,
|
||||||
std::vector<ggml_tensor*> ref_latents = {},
|
std::vector<ggml_tensor*> ref_latents = {},
|
||||||
bool increase_ref_index = false,
|
bool increase_ref_index = false,
|
||||||
struct ggml_tensor** output = nullptr,
|
ggml_tensor** output = nullptr,
|
||||||
struct ggml_context* output_ctx = nullptr) {
|
ggml_context* output_ctx = nullptr) {
|
||||||
// x: [N, in_channels, h, w]
|
// x: [N, in_channels, h, w]
|
||||||
// timesteps: [N, ]
|
// timesteps: [N, ]
|
||||||
// context: [N, max_position, hidden_size]
|
// context: [N, max_position, hidden_size]
|
||||||
auto get_graph = [&]() -> struct ggml_cgraph* {
|
auto get_graph = [&]() -> ggml_cgraph* {
|
||||||
return build_graph(x, timesteps, context, ref_latents, increase_ref_index);
|
return build_graph(x, timesteps, context, ref_latents, increase_ref_index);
|
||||||
};
|
};
|
||||||
|
|
||||||
GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
return GGMLRunner::compute(get_graph, n_threads, false, output, output_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test() {
|
void test() {
|
||||||
struct ggml_init_params params;
|
ggml_init_params params;
|
||||||
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1GB
|
params.mem_size = static_cast<size_t>(1024 * 1024) * 1024; // 1GB
|
||||||
params.mem_buffer = nullptr;
|
params.mem_buffer = nullptr;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
struct ggml_context* work_ctx = ggml_init(params);
|
ggml_context* work_ctx = ggml_init(params);
|
||||||
GGML_ASSERT(work_ctx != nullptr);
|
GGML_ASSERT(work_ctx != nullptr);
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -629,14 +641,14 @@ namespace Qwen {
|
|||||||
auto context = load_tensor_from_file(work_ctx, "./qwen_image_context.bin");
|
auto context = load_tensor_from_file(work_ctx, "./qwen_image_context.bin");
|
||||||
print_ggml_tensor(context);
|
print_ggml_tensor(context);
|
||||||
|
|
||||||
struct ggml_tensor* out = nullptr;
|
ggml_tensor* out = nullptr;
|
||||||
|
|
||||||
int t0 = ggml_time_ms();
|
int64_t t0 = ggml_time_ms();
|
||||||
compute(8, x, timesteps, context, {}, false, &out, work_ctx);
|
compute(8, x, timesteps, context, {}, false, &out, work_ctx);
|
||||||
int t1 = ggml_time_ms();
|
int64_t t1 = ggml_time_ms();
|
||||||
|
|
||||||
print_ggml_tensor(out);
|
print_ggml_tensor(out);
|
||||||
LOG_DEBUG("qwen_image test done in %dms", t1 - t0);
|
LOG_DEBUG("qwen_image test done in %lldms", t1 - t0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ class MT19937RNG : public RNG {
|
|||||||
float u1 = 1.0f - data[j];
|
float u1 = 1.0f - data[j];
|
||||||
float u2 = data[j + 8];
|
float u2 = data[j + 8];
|
||||||
float r = std::sqrt(-2.0f * std::log(u1));
|
float r = std::sqrt(-2.0f * std::log(u1));
|
||||||
float theta = 2.0f * 3.14159265358979323846 * u2;
|
float theta = 2.0f * 3.14159265358979323846f * u2;
|
||||||
data[j] = r * std::cos(theta) * std + mean;
|
data[j] = r * std::cos(theta) * std + mean;
|
||||||
data[j + 8] = r * std::sin(theta) * std + mean;
|
data[j + 8] = r * std::sin(theta) * std + mean;
|
||||||
}
|
}
|
||||||