fix: clarify lora quant support and small fixes (#792)

2026-02-05 03:16:05 +00:00 · 2025-09-08 16:39:25 +02:00 · 2025-09-08 16:39:25 +02:00 · abb115cd02
commit abb115cd02
parent c648001030
5 changed files with 49 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,10 +4,11 @@ test/
 .cache/
 *.swp
 .vscode/
+.idea/
 *.bat
 *.bin
 *.exe
 *.gguf
 output*.png
 models*
-*.log
+*.log
--- a/README.md
+++ b/README.md
@ -137,7 +137,9 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
 Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.

 ```
-cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
+export GFX_NAME=$(rocminfo | grep -m 1 -E "gfx[^0]{1}" | sed -e 's/ *Name: *//' | awk '{$1=$1; print}' || echo "rocminfo missing")
+echo $GFX_NAME
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
 cmake --build . --config Release
 ```

--- a/docs/lora.md
+++ b/docs/lora.md
@ -10,4 +10,30 @@ Here's a simple example:
 ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
 ```

-`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
+`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
+
+# Support matrix
+
+> ℹ️ CUDA `get_rows` support is defined here:  
+> [ggml-org/ggml/src/ggml-cuda/getrows.cu#L156](https://github.com/ggml-org/ggml/blob/7dee1d6a1e7611f238d09be96738388da97c88ed/src/ggml-cuda/getrows.cu#L156)  
+> Currently only the basic types + Q4/Q5/Q8 are implemented. K-quants are **not** supported.
+
+NOTE: The other backends may have different support.
+
+| Quant / Type | CUDA |
+|--------------|------|
+| F32          | ✔️   |
+| F16          | ✔️   |
+| BF16         | ✔️   |
+| I32          | ✔️   |
+| Q4_0         | ✔️   |
+| Q4_1         | ✔️   |
+| Q5_0         | ✔️   |
+| Q5_1         | ✔️   |
+| Q8_0         | ✔️   |
+| Q2_K         | ❌   |
+| Q3_K         | ❌   |
+| Q4_K         | ❌   |
+| Q5_K         | ❌   |
+| Q6_K         | ❌   |
+| Q8_K         | ❌   |
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+#include <filesystem>
 #include <functional>
 #include <iostream>
 #include <map>
@ -1283,6 +1284,21 @@ int main(int argc, const char* argv[]) {
        }
    }

+    // create directory if not exists
+    {
+        namespace fs            = std::filesystem;
+        const fs::path out_path = params.output_path;
+        if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) {
+            std::error_code ec;
+            fs::create_directories(out_dir, ec);  // OK if already exists
+            if (ec) {
+                fprintf(stderr, "failed to create directory '%s': %s\n",
+                        out_dir.string().c_str(), ec.message().c_str());
+                return 1;
+            }
+        }
+    }
+
    std::string base_path;
    std::string file_ext;
    std::string file_ext_lower;
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -330,7 +330,7 @@ public:
            if (sd_version_is_dit(version)) {
                use_t5xxl = true;
            }
-            if (!ggml_backend_is_cpu(backend) && use_t5xxl) {
+            if (!clip_on_cpu && !ggml_backend_is_cpu(backend) && use_t5xxl) {
                LOG_WARN(
                    "!!!It appears that you are using the T5 model. Some backends may encounter issues with it."
                    "If you notice that the generated images are completely black,"