mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
5 Commits
d6c87dce5c
...
23de7fc44a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
23de7fc44a | ||
|
|
d42fd59464 | ||
|
|
0d8b39f0ba | ||
|
|
539b5b9374 | ||
|
|
b1fc16b504 |
@ -28,6 +28,7 @@ option(SD_CUDA "sd: cuda backend" OFF)
|
|||||||
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
option(SD_HIPBLAS "sd: rocm backend" OFF)
|
||||||
option(SD_METAL "sd: metal backend" OFF)
|
option(SD_METAL "sd: metal backend" OFF)
|
||||||
option(SD_VULKAN "sd: vulkan backend" OFF)
|
option(SD_VULKAN "sd: vulkan backend" OFF)
|
||||||
|
option(SD_OPENCL "sd: opencl backend" OFF)
|
||||||
option(SD_SYCL "sd: sycl backend" OFF)
|
option(SD_SYCL "sd: sycl backend" OFF)
|
||||||
option(SD_MUSA "sd: musa backend" OFF)
|
option(SD_MUSA "sd: musa backend" OFF)
|
||||||
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
|
||||||
@ -52,6 +53,12 @@ if (SD_VULKAN)
|
|||||||
add_definitions(-DSD_USE_VULKAN)
|
add_definitions(-DSD_USE_VULKAN)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (SD_OPENCL)
|
||||||
|
message("-- Use OpenCL as backend stable-diffusion")
|
||||||
|
set(GGML_OPENCL ON)
|
||||||
|
add_definitions(-DSD_USE_OPENCL)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (SD_HIPBLAS)
|
if (SD_HIPBLAS)
|
||||||
message("-- Use HIPBLAS as backend stable-diffusion")
|
message("-- Use HIPBLAS as backend stable-diffusion")
|
||||||
set(GGML_HIP ON)
|
set(GGML_HIP ON)
|
||||||
|
|||||||
@ -2,14 +2,17 @@ ARG MUSA_VERSION=rc3.1.1
|
|||||||
|
|
||||||
FROM mthreads/musa:${MUSA_VERSION}-devel-ubuntu22.04 as build
|
FROM mthreads/musa:${MUSA_VERSION}-devel-ubuntu22.04 as build
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y cmake
|
RUN apt-get update && apt-get install -y ccache cmake git
|
||||||
|
|
||||||
WORKDIR /sd.cpp
|
WORKDIR /sd.cpp
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN mkdir build && cd build && \
|
RUN mkdir build && cd build && \
|
||||||
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release && \
|
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
|
||||||
|
-DCMAKE_C_FLAGS="${CMAKE_C_FLAGS} -fopenmp -I/usr/lib/llvm-14/lib/clang/14.0.0/include -L/usr/lib/llvm-14/lib" \
|
||||||
|
-DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -fopenmp -I/usr/lib/llvm-14/lib/clang/14.0.0/include -L/usr/lib/llvm-14/lib" \
|
||||||
|
-DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release && \
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
|
|
||||||
FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu22.04 as runtime
|
FROM mthreads/musa:${MUSA_VERSION}-runtime-ubuntu22.04 as runtime
|
||||||
|
|||||||
69
README.md
69
README.md
@ -22,7 +22,7 @@ Inference of Stable Diffusion and Flux in pure C/C++
|
|||||||
- Accelerated memory-efficient CPU inference
|
- Accelerated memory-efficient CPU inference
|
||||||
- Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
|
- Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
|
||||||
- AVX, AVX2 and AVX512 support for x86 architectures
|
- AVX, AVX2 and AVX512 support for x86 architectures
|
||||||
- Full CUDA, Metal, Vulkan and SYCL backend for GPU acceleration.
|
- Full CUDA, Metal, Vulkan, OpenCL and SYCL backend for GPU acceleration.
|
||||||
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs models
|
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs models
|
||||||
- No need to convert to `.ggml` or `.gguf` anymore!
|
- No need to convert to `.ggml` or `.gguf` anymore!
|
||||||
- Flash Attention for memory usage optimization
|
- Flash Attention for memory usage optimization
|
||||||
@ -160,6 +160,73 @@ cmake .. -DSD_VULKAN=ON
|
|||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
##### Using OpenCL (for Adreno GPU)
|
||||||
|
|
||||||
|
Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
|
||||||
|
|
||||||
|
To build for Windows ARM please refers to [Windows 11 Arm64
|
||||||
|
](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
|
||||||
|
|
||||||
|
Building for Android:
|
||||||
|
|
||||||
|
Android NDK:
|
||||||
|
Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
|
||||||
|
|
||||||
|
Setup OpenCL Dependencies for NDK:
|
||||||
|
|
||||||
|
You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
|
||||||
|
|
||||||
|
* OpenCL Headers:
|
||||||
|
```bash
|
||||||
|
# In a temporary working directory
|
||||||
|
git clone https://github.com/KhronosGroup/OpenCL-Headers
|
||||||
|
cd OpenCL-Headers
|
||||||
|
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
|
||||||
|
# e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
|
||||||
|
sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
|
||||||
|
cd ..
|
||||||
|
```
|
||||||
|
|
||||||
|
* OpenCL ICD Loader:
|
||||||
|
```bash
|
||||||
|
# In the same temporary working directory
|
||||||
|
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
|
||||||
|
cd OpenCL-ICD-Loader
|
||||||
|
mkdir build_ndk && cd build_ndk
|
||||||
|
|
||||||
|
# Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
|
||||||
|
cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
|
||||||
|
-DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
|
||||||
|
-DANDROID_ABI=arm64-v8a \
|
||||||
|
-DANDROID_PLATFORM=24 \
|
||||||
|
-DANDROID_STL=c++_shared
|
||||||
|
|
||||||
|
ninja
|
||||||
|
# Replace <YOUR_NDK_PATH>
|
||||||
|
# e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
|
||||||
|
sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
|
||||||
|
cd ../..
|
||||||
|
```
|
||||||
|
|
||||||
|
Build `stable-diffusion.cpp` for Android with OpenCL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build-android && cd build-android
|
||||||
|
|
||||||
|
# Replace <YOUR_NDK_PATH> with your actual NDK installation path
|
||||||
|
# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
|
||||||
|
cmake .. -G Ninja \
|
||||||
|
-DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
|
||||||
|
-DANDROID_ABI=arm64-v8a \
|
||||||
|
-DANDROID_PLATFORM=android-28 \
|
||||||
|
-DGGML_OPENMP=OFF \
|
||||||
|
-DSD_OPENCL=ON
|
||||||
|
|
||||||
|
ninja
|
||||||
|
```
|
||||||
|
*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
|
||||||
|
|
||||||
##### Using SYCL
|
##### Using SYCL
|
||||||
|
|
||||||
Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
|
Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
|
||||||
|
|||||||
12
clip.hpp
12
clip.hpp
@ -678,8 +678,8 @@ public:
|
|||||||
bool with_final_ln = true;
|
bool with_final_ln = true;
|
||||||
|
|
||||||
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPTextModel(CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
int clip_skip_value = -1,
|
bool with_final_ln = true,
|
||||||
bool with_final_ln = true)
|
int clip_skip_value = -1)
|
||||||
: version(version), with_final_ln(with_final_ln) {
|
: version(version), with_final_ln(with_final_ln) {
|
||||||
if (version == OPEN_CLIP_VIT_H_14) {
|
if (version == OPEN_CLIP_VIT_H_14) {
|
||||||
hidden_size = 1024;
|
hidden_size = 1024;
|
||||||
@ -701,7 +701,7 @@ public:
|
|||||||
|
|
||||||
void set_clip_skip(int skip) {
|
void set_clip_skip(int skip) {
|
||||||
if (skip <= 0) {
|
if (skip <= 0) {
|
||||||
return;
|
skip = -1;
|
||||||
}
|
}
|
||||||
clip_skip = skip;
|
clip_skip = skip;
|
||||||
}
|
}
|
||||||
@ -871,9 +871,9 @@ struct CLIPTextModelRunner : public GGMLRunner {
|
|||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
const std::string prefix,
|
const std::string prefix,
|
||||||
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
|
||||||
int clip_skip_value = 1,
|
bool with_final_ln = true,
|
||||||
bool with_final_ln = true)
|
int clip_skip_value = -1)
|
||||||
: GGMLRunner(backend), model(version, clip_skip_value, with_final_ln) {
|
: GGMLRunner(backend), model(version, with_final_ln, clip_skip_value) {
|
||||||
model.init(params_ctx, tensor_types, prefix);
|
model.init(params_ctx, tensor_types, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -56,7 +56,7 @@ public:
|
|||||||
// x: [N, channels, h, w]
|
// x: [N, channels, h, w]
|
||||||
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
auto conv = std::dynamic_pointer_cast<Conv2d>(blocks["conv"]);
|
||||||
|
|
||||||
x = ggml_upscale(ctx, x, 2); // [N, channels, h*2, w*2]
|
x = ggml_upscale(ctx, x, 2, GGML_SCALE_MODE_NEAREST); // [N, channels, h*2, w*2]
|
||||||
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
|
x = conv->forward(ctx, x); // [N, out_channels, h*2, w*2]
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -63,23 +63,24 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
|
|||||||
PMVersion pv = PM_VERSION_1,
|
PMVersion pv = PM_VERSION_1,
|
||||||
int clip_skip = -1)
|
int clip_skip = -1)
|
||||||
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
: version(version), pm_version(pv), tokenizer(sd_version_is_sd2(version) ? 0 : 49407), embd_dir(embd_dir) {
|
||||||
|
if (sd_version_is_sd1(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14);
|
||||||
|
} else if (sd_version_is_sd2(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14);
|
||||||
|
} else if (sd_version_is_sdxl(version)) {
|
||||||
|
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||||
|
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||||
|
}
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_clip_skip(int clip_skip) {
|
||||||
if (clip_skip <= 0) {
|
if (clip_skip <= 0) {
|
||||||
clip_skip = 1;
|
clip_skip = 1;
|
||||||
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
if (sd_version_is_sd2(version) || sd_version_is_sdxl(version)) {
|
||||||
clip_skip = 2;
|
clip_skip = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sd_version_is_sd1(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip);
|
|
||||||
} else if (sd_version_is_sd2(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPEN_CLIP_VIT_H_14, clip_skip);
|
|
||||||
} else if (sd_version_is_sdxl(version)) {
|
|
||||||
text_model = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
|
|
||||||
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "cond_stage_model.1.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
|
||||||
text_model->set_clip_skip(clip_skip);
|
text_model->set_clip_skip(clip_skip);
|
||||||
if (sd_version_is_sdxl(version)) {
|
if (sd_version_is_sdxl(version)) {
|
||||||
text_model2->set_clip_skip(clip_skip);
|
text_model2->set_clip_skip(clip_skip);
|
||||||
@ -665,15 +666,16 @@ struct SD3CLIPEmbedder : public Conditioner {
|
|||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
int clip_skip = -1)
|
int clip_skip = -1)
|
||||||
: clip_g_tokenizer(0) {
|
: clip_g_tokenizer(0) {
|
||||||
if (clip_skip <= 0) {
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, false);
|
||||||
clip_skip = 2;
|
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, false);
|
||||||
}
|
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, false);
|
|
||||||
clip_g = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_g.transformer.text_model", OPEN_CLIP_VIT_BIGG_14, clip_skip, false);
|
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
void set_clip_skip(int clip_skip) {
|
||||||
|
if (clip_skip <= 0) {
|
||||||
|
clip_skip = 2;
|
||||||
|
}
|
||||||
clip_l->set_clip_skip(clip_skip);
|
clip_l->set_clip_skip(clip_skip);
|
||||||
clip_g->set_clip_skip(clip_skip);
|
clip_g->set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
@ -1010,14 +1012,15 @@ struct FluxCLIPEmbedder : public Conditioner {
|
|||||||
FluxCLIPEmbedder(ggml_backend_t backend,
|
FluxCLIPEmbedder(ggml_backend_t backend,
|
||||||
std::map<std::string, enum ggml_type>& tensor_types,
|
std::map<std::string, enum ggml_type>& tensor_types,
|
||||||
int clip_skip = -1) {
|
int clip_skip = -1) {
|
||||||
if (clip_skip <= 0) {
|
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, true);
|
||||||
clip_skip = 2;
|
|
||||||
}
|
|
||||||
clip_l = std::make_shared<CLIPTextModelRunner>(backend, tensor_types, "text_encoders.clip_l.transformer.text_model", OPENAI_CLIP_VIT_L_14, clip_skip, true);
|
|
||||||
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
t5 = std::make_shared<T5Runner>(backend, tensor_types, "text_encoders.t5xxl.transformer");
|
||||||
|
set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_clip_skip(int clip_skip) {
|
void set_clip_skip(int clip_skip) {
|
||||||
|
if (clip_skip <= 0) {
|
||||||
|
clip_skip = 2;
|
||||||
|
}
|
||||||
clip_l->set_clip_skip(clip_skip);
|
clip_l->set_clip_skip(clip_skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1422,4 +1425,4 @@ struct PixArtCLIPEmbedder : public Conditioner {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -130,8 +130,8 @@ public:
|
|||||||
body_feat = conv_body->forward(ctx, body_feat);
|
body_feat = conv_body->forward(ctx, body_feat);
|
||||||
feat = ggml_add(ctx, feat, body_feat);
|
feat = ggml_add(ctx, feat, body_feat);
|
||||||
// upsample
|
// upsample
|
||||||
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2)));
|
feat = lrelu(ctx, conv_up1->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
|
||||||
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2)));
|
feat = lrelu(ctx, conv_up2->forward(ctx, ggml_upscale(ctx, feat, 2, GGML_SCALE_MODE_NEAREST)));
|
||||||
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
|
auto out = conv_last->forward(ctx, lrelu(ctx, conv_hr->forward(ctx, feat)));
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|||||||
2
ggml
2
ggml
@ -1 +1 @@
|
|||||||
Subproject commit ff9052988b76e137bcf92bb335733933ca196ac0
|
Subproject commit 9e4bee1c5afc2d677a5b32ecb90cbdb483e81fff
|
||||||
@ -39,6 +39,10 @@
|
|||||||
#include "ggml-vulkan.h"
|
#include "ggml-vulkan.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SD_USE_OPENCL
|
||||||
|
#include "ggml-opencl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SD_USE_SYCL
|
#ifdef SD_USE_SYCL
|
||||||
#include "ggml-sycl.h"
|
#include "ggml-sycl.h"
|
||||||
#endif
|
#endif
|
||||||
@ -113,7 +117,8 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_kronecker(ggml_context* ctx, struct g
|
|||||||
a->ne[0] * b->ne[0],
|
a->ne[0] * b->ne[0],
|
||||||
a->ne[1] * b->ne[1],
|
a->ne[1] * b->ne[1],
|
||||||
a->ne[2] * b->ne[2],
|
a->ne[2] * b->ne[2],
|
||||||
a->ne[3] * b->ne[3]),
|
a->ne[3] * b->ne[3],
|
||||||
|
GGML_SCALE_MODE_NEAREST),
|
||||||
b);
|
b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
lora.hpp
2
lora.hpp
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include "ggml_extend.hpp"
|
#include "ggml_extend.hpp"
|
||||||
|
|
||||||
#define LORA_GRAPH_SIZE 10240
|
#define LORA_GRAPH_SIZE 15360
|
||||||
|
|
||||||
struct LoraModel : public GGMLRunner {
|
struct LoraModel : public GGMLRunner {
|
||||||
enum lora_t {
|
enum lora_t {
|
||||||
|
|||||||
@ -26,6 +26,10 @@
|
|||||||
#include "ggml-vulkan.h"
|
#include "ggml-vulkan.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SD_USE_OPENCL
|
||||||
|
#include "ggml-opencl.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define ST_HEADER_SIZE_LEN 8
|
#define ST_HEADER_SIZE_LEN 8
|
||||||
|
|
||||||
uint64_t read_u64(uint8_t* buffer) {
|
uint64_t read_u64(uint8_t* buffer) {
|
||||||
|
|||||||
@ -181,6 +181,14 @@ public:
|
|||||||
LOG_WARN("Failed to initialize Vulkan backend");
|
LOG_WARN("Failed to initialize Vulkan backend");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SD_USE_OPENCL
|
||||||
|
LOG_DEBUG("Using OpenCL backend");
|
||||||
|
// ggml_log_set(ggml_log_callback_default, nullptr); // Optional ggml logs
|
||||||
|
backend = ggml_backend_opencl_init();
|
||||||
|
if (!backend) {
|
||||||
|
LOG_WARN("Failed to initialize OpenCL backend");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef SD_USE_SYCL
|
#ifdef SD_USE_SYCL
|
||||||
LOG_DEBUG("Using SYCL backend");
|
LOG_DEBUG("Using SYCL backend");
|
||||||
backend = ggml_backend_sycl_init(0);
|
backend = ggml_backend_sycl_init(0);
|
||||||
@ -276,10 +284,10 @@ public:
|
|||||||
model_loader.set_wtype_override(GGML_TYPE_F32, "vae.");
|
model_loader.set_wtype_override(GGML_TYPE_F32, "vae.");
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO("Weight type: %s", model_wtype != SD_TYPE_COUNT ? ggml_type_name(model_wtype) : "??");
|
LOG_INFO("Weight type: %s", model_wtype != GGML_TYPE_COUNT ? ggml_type_name(model_wtype) : "??");
|
||||||
LOG_INFO("Conditioner weight type: %s", conditioner_wtype != SD_TYPE_COUNT ? ggml_type_name(conditioner_wtype) : "??");
|
LOG_INFO("Conditioner weight type: %s", conditioner_wtype != GGML_TYPE_COUNT ? ggml_type_name(conditioner_wtype) : "??");
|
||||||
LOG_INFO("Diffusion model weight type: %s", diffusion_model_wtype != SD_TYPE_COUNT ? ggml_type_name(diffusion_model_wtype) : "??");
|
LOG_INFO("Diffusion model weight type: %s", diffusion_model_wtype != GGML_TYPE_COUNT ? ggml_type_name(diffusion_model_wtype) : "??");
|
||||||
LOG_INFO("VAE weight type: %s", vae_wtype != SD_TYPE_COUNT ? ggml_type_name(vae_wtype) : "??");
|
LOG_INFO("VAE weight type: %s", vae_wtype != GGML_TYPE_COUNT ? ggml_type_name(vae_wtype) : "??");
|
||||||
|
|
||||||
LOG_DEBUG("ggml tensor size = %d bytes", (int)sizeof(ggml_tensor));
|
LOG_DEBUG("ggml tensor size = %d bytes", (int)sizeof(ggml_tensor));
|
||||||
|
|
||||||
|
|||||||
2
tae.hpp
2
tae.hpp
@ -149,7 +149,7 @@ public:
|
|||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
h = ggml_relu_inplace(ctx, h);
|
h = ggml_relu_inplace(ctx, h);
|
||||||
} else {
|
} else {
|
||||||
h = ggml_upscale(ctx, h, 2);
|
h = ggml_upscale(ctx, h, 2, GGML_SCALE_MODE_NEAREST);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -28,6 +28,10 @@ struct UpscalerGGML {
|
|||||||
LOG_DEBUG("Using Vulkan backend");
|
LOG_DEBUG("Using Vulkan backend");
|
||||||
backend = ggml_backend_vk_init(0);
|
backend = ggml_backend_vk_init(0);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SD_USE_OPENCL
|
||||||
|
LOG_DEBUG("Using OpenCL backend");
|
||||||
|
backend = ggml_backend_opencl_init();
|
||||||
|
#endif
|
||||||
#ifdef SD_USE_SYCL
|
#ifdef SD_USE_SYCL
|
||||||
LOG_DEBUG("Using SYCL backend");
|
LOG_DEBUG("Using SYCL backend");
|
||||||
backend = ggml_backend_sycl_init(0);
|
backend = ggml_backend_sycl_init(0);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user