feat: add OpenCL backend support (#680)

This commit is contained in:
rmatif 2025-06-30 17:32:23 +02:00 committed by GitHub
parent 0d8b39f0ba
commit d42fd59464
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 102 additions and 7 deletions

View File

@ -28,6 +28,7 @@ option(SD_CUDA "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
option(SD_VULKAN "sd: vulkan backend" OFF)
option(SD_OPENCL "sd: opencl backend" OFF)
option(SD_SYCL "sd: sycl backend" OFF)
option(SD_MUSA "sd: musa backend" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
@ -52,6 +53,12 @@ if (SD_VULKAN)
add_definitions(-DSD_USE_VULKAN)
endif ()
if (SD_OPENCL)
message("-- Use OpenCL as backend stable-diffusion")
set(GGML_OPENCL ON)
add_definitions(-DSD_USE_OPENCL)
endif ()
if (SD_HIPBLAS)
message("-- Use HIPBLAS as backend stable-diffusion")
set(GGML_HIP ON)

View File

@ -22,7 +22,7 @@ Inference of Stable Diffusion and Flux in pure C/C++
- Accelerated memory-efficient CPU inference
- Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
- AVX, AVX2 and AVX512 support for x86 architectures
- Full CUDA, Metal, Vulkan and SYCL backend for GPU acceleration.
- Full CUDA, Metal, Vulkan, OpenCL and SYCL backend for GPU acceleration.
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs models
- No need to convert to `.ggml` or `.gguf` anymore!
- Flash Attention for memory usage optimization
@ -160,6 +160,73 @@ cmake .. -DSD_VULKAN=ON
cmake --build . --config Release
```
##### Using OpenCL (for Adreno GPU)
Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
To build for Windows ARM please refers to [Windows 11 Arm64
](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
Building for Android:
Android NDK:
Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
Setup OpenCL Dependencies for NDK:
You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
* OpenCL Headers:
```bash
# In a temporary working directory
git clone https://github.com/KhronosGroup/OpenCL-Headers
cd OpenCL-Headers
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
# e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
cd ..
```
* OpenCL ICD Loader:
```bash
# In the same temporary working directory
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
cd OpenCL-ICD-Loader
mkdir build_ndk && cd build_ndk
# Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=24 \
-DANDROID_STL=c++_shared
ninja
# Replace <YOUR_NDK_PATH>
# e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
cd ../..
```
Build `stable-diffusion.cpp` for Android with OpenCL:
```bash
mkdir build-android && cd build-android
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
cmake .. -G Ninja \
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-28 \
-DGGML_OPENMP=OFF \
-DSD_OPENCL=ON
ninja
```
*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
##### Using SYCL
Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md#linux).

View File

@ -56,7 +56,7 @@ public:
// x: [N, channels, h, w]
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
x = ggml_upscale(ctx, x, 2); // [N, channels, h*2, w*2]
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
return x;
}

View File

@ -130,8 +130,8 @@ public:
body_feat = conv_body->forward(ctx, body_feat);
feat = ggml_add(ctx, feat, body_feat);
// upsample
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2)));
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2)));
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
return out;
}

2
ggml

@ -1 +1 @@
Subproject commit ff9052988b76e137bcf92bb335733933ca196ac0
Subproject commit 9e4bee1c5afc2d677a5b32ecb90cbdb483e81fff

View File

@ -39,6 +39,10 @@
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#ifdef SD_USE_SYCL
#include "ggml-sycl.h"
#endif
@ -113,7 +117,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
a->ne[0] * b->ne[0],
a->ne[1] * b->ne[1],
a->ne[2] * b->ne[2],
a->ne[3] * b->ne[3]),
a->ne[3] * b->ne[3],
GGML_SCALE_MODE_NEAREST),
b);
}

View File

@ -26,6 +26,10 @@
#include "ggml-vulkan.h"
#endif
#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif
#define ST_HEADER_SIZE_LEN 8
uint64_t read_u64(uint8_t* buffer) {

View File

@ -181,6 +181,14 @@ public:
LOG_WARN("Failed to initialize Vulkan backend");
}
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
// ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
backend = ggml_backend_opencl_init();
if (!backend) {
LOG_WARN("Failed to initialize OpenCL backend");
}
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);

View File

@ -149,7 +149,7 @@ public:
if (i == 1) {
h = ggml_relu_inplace(ctx, h);
} else {
h = ggml_upscale(ctx, h, 2);
h = ggml_upscale(ctx, h, 2, GGML_SCALE_MODE_NEAREST);
}
continue;
}

View File

@ -28,6 +28,10 @@ struct UpscalerGGML {
LOG_DEBUG("Using Vulkan backend");
backend = ggml_backend_vk_init(0);
#endif
#ifdef SD_USE_OPENCL
LOG_DEBUG("Using OpenCL backend");
backend = ggml_backend_opencl_init();
#endif
#ifdef SD_USE_SYCL
LOG_DEBUG("Using SYCL backend");
backend = ggml_backend_sycl_init(0);