2025-12-13 05:48:56 +00:00
5 changed files with 17 additions and 59 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -163,7 +163,7 @@ jobs:
          - build: "avx512"
            defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
          - build: "cuda12"
-            defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
+            defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=60;61;70;75"
          # - build: "rocm5.5"
          #   defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
          - build: 'vulkan'
@ -178,9 +178,9 @@ jobs:
      - name: Install cuda-toolkit
        id: cuda-toolkit
        if: ${{ matrix.build == 'cuda12' }}
-        uses: Jimver/cuda-toolkit@v0.2.19
+        uses: Jimver/cuda-toolkit@v0.2.11
        with:
-          cuda: "12.6.2"
+          cuda: "12.2.0"
          method: "network"
          sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -64,7 +64,7 @@ endif ()
 if(SD_MUSA)
    message("-- Use MUSA as backend stable-diffusion")
    set(GGML_MUSA ON)
-    add_definitions(-DSD_USE_CUDA)
+    add_definitions(-DSD_USE_CUBLAS)
    if(SD_FAST_SOFTMAX)
        set(GGML_CUDA_FAST_SOFTMAX ON)
    endif()
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -1057,41 +1057,16 @@ int main(int argc, const char* argv[]) {
        }
    }

-    std::string dummy_name, ext, lc_ext;
-    bool is_jpg;
-    size_t last = params.output_path.find_last_of(".");
-    size_t last_path = std::min(params.output_path.find_last_of("/"),
-                                params.output_path.find_last_of("\\"));
-    if (last != std::string::npos // filename has extension
-    && (last_path == std::string::npos || last > last_path)) {
-        dummy_name = params.output_path.substr(0, last);
-        ext = lc_ext = params.output_path.substr(last);
-        std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
-        is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
-    } else {
-        dummy_name = params.output_path;
-        ext = lc_ext = "";
-        is_jpg = false;
-    }
-    // appending ".png" to absent or unknown extension
-    if (!is_jpg && lc_ext != ".png") {
-        dummy_name += ext;
-        ext = ".png";
-    }
+    size_t last            = params.output_path.find_last_of(".");
+    std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
    for (int i = 0; i < params.batch_count; i++) {
        if (results[i].data == NULL) {
            continue;
        }
-        std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
-        if(is_jpg) {
-            stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
-                           results[i].data, 90, get_image_params(params, params.seed + i).c_str());
-            printf("save result JPEG image to '%s'\n", final_image_path.c_str());
-        } else {
-            stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
-                           results[i].data, 0, get_image_params(params, params.seed + i).c_str());
-            printf("save result PNG image to '%s'\n", final_image_path.c_str());
-        }
+        std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
+        stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
+                       results[i].data, 0, get_image_params(params, params.seed + i).c_str());
+        printf("save result image to '%s'\n", final_image_path.c_str());
        free(results[i].data);
        results[i].data = NULL;
    }
--- a/lora.hpp
+++ b/lora.hpp
@ -615,12 +615,9 @@ struct LoraModel : public GGMLRunner {
                scale_value *= multiplier;

                // flat lora tensors to multiply it
-                int64_t lora_up_rows  = lora_up->ne[ggml_n_dims(lora_up) - 1];
-                lora_up               = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
-                auto lora_down_n_dims = ggml_n_dims(lora_down);
-                // assume n_dims should always be a multiple of 2 (otherwise rank 1 doesn't work)
-                lora_down_n_dims       = (lora_down_n_dims + lora_down_n_dims % 2);
-                int64_t lora_down_rows = lora_down->ne[lora_down_n_dims - 1];
+                int64_t lora_up_rows   = lora_up->ne[ggml_n_dims(lora_up) - 1];
+                lora_up                = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
+                int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) - 1];
                lora_down              = ggml_reshape_2d(compute_ctx, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);

                // ggml_mul_mat requires tensor b transposed
--- a/thirdparty/stb_image_write.h
+++ b/thirdparty/stb_image_write.h
@ -1412,7 +1412,7 @@ static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt
   return DU[0];
 }

-static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality, const char* parameters) {
+static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
   // Constants that don't pollute global namespace
   static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
   static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
@ -1521,20 +1521,6 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in
      s->func(s->context, (void*)YTable, sizeof(YTable));
      stbiw__putc(s, 1);
      s->func(s->context, UVTable, sizeof(UVTable));
-
-      // comment block with parameters of generation
-      if(parameters != NULL) {
-         stbiw__putc(s, 0xFF /* comnent */ );
-         stbiw__putc(s, 0xFE /* marker  */ );
-         size_t param_length = std::min(2 + strlen("parameters") + 1 + strlen(parameters) + 1, (size_t) 0xFFFF);
-         stbiw__putc(s, param_length >> 8); // no need to mask, length < 65536
-         stbiw__putc(s, param_length & 0xFF);
-         s->func(s->context, (void*)"parameters", strlen("parameters") + 1); // std::string is zero-terminated
-         s->func(s->context, (void*)parameters, std::min(param_length, (size_t) 65534) - 2 - strlen("parameters") - 1);
-         if(param_length > 65534) stbiw__putc(s, 0); // always zero-terminate for safety
-         if(param_length & 1) stbiw__putc(s, 0xFF); // pad to even length
-      }
-
      s->func(s->context, (void*)head1, sizeof(head1));
      s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
      s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
@ -1639,16 +1625,16 @@ STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x,
 {
   stbi__write_context s = { 0 };
   stbi__start_write_callbacks(&s, func, context);
-   return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality, NULL);
+   return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
 }


 #ifndef STBI_WRITE_NO_STDIO
-STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality, const char* parameters)
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
 {
   stbi__write_context s = { 0 };
   if (stbi__start_write_file(&s,filename)) {
-      int r = stbi_write_jpg_core(&s, x, y, comp, data, quality, parameters);
+      int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
      stbi__end_write_file(&s);
      return r;
   } else