feat: show usage on unknown arg (#767 )

docs: add compile option needed by Ninja (#770 )
docs: add missing dash to docs/chroma.md (#771 )
2025-12-13 05:48:56 +00:00 · 2025-09-01 21:38:34 +08:00 · 2025-09-01 21:35:25 +08:00 · 2025-09-01 21:34:34 +08:00 · 2025-09-01 21:32:01 +08:00 · 2025-09-01 21:21:34 +08:00
5 changed files with 35 additions and 14 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -163,7 +163,7 @@ jobs:
          - build: "avx512"
            defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
          - build: "cuda12"
-            defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
+            defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;86;80;75"
          # - build: "rocm5.5"
          #   defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
          - build: 'vulkan'
--- a/README.md
+++ b/README.md
@ -129,7 +129,7 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
 Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.

 ```
-cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
 cmake --build . --config Release
 ```

--- a/docs/chroma.md
+++ b/docs/chroma.md
@ -24,7 +24,7 @@ You can download the preconverted gguf weights from [silveroxides/Chroma-GGUF](h
 For example:

 ```
- .\bin\Release\sd.exe -diffusion-model  ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors  -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask
+ .\bin\Release\sd.exe --diffusion-model  ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors  -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask
 ```

 ![](../assets/flux/chroma_v40.png)
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@ -297,10 +297,12 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
    bool invalid_arg = false;
    std::string arg;
    for (int i = 1; i < argc; i++) {
+        bool found_arg = false;
        arg = argv[i];

        for (auto& option : options.string_options) {
            if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
+                found_arg = true;
                if (++i >= argc) {
                    invalid_arg = true;
                    break;
@ -314,6 +316,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {

        for (auto& option : options.int_options) {
            if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
+                found_arg = true;
                if (++i >= argc) {
                    invalid_arg = true;
                    break;
@ -327,6 +330,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {

        for (auto& option : options.float_options) {
            if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
+                found_arg = true;
                if (++i >= argc) {
                    invalid_arg = true;
                    break;
@ -340,6 +344,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {

        for (auto& option : options.bool_options) {
            if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
+                found_arg = true;
                if (option.keep_true) {
                    *option.target = true;
                } else {
@ -353,6 +358,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {

        for (auto& option : options.manual_options) {
            if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
+                found_arg = true;
                int ret = option.cb(argc, argv, i);
                if (ret < 0) {
                    invalid_arg = true;
@ -364,6 +370,10 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
        if (invalid_arg) {
            break;
        }
+        if (!found_arg) {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());    
+            return false;
+        }
    }
    if (invalid_arg) {
        fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
--- a/model.cpp
+++ b/model.cpp
@ -1942,8 +1942,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
            return true;
        };
        int tensor_count = 0;
-        int64_t t1       = ggml_time_ms();
-        bool partial     = false;
+        int64_t t0       = ggml_time_ms();
+        int64_t t1       = t0;
+        bool partial     = true;
+        int tensor_max   = (int)processed_tensor_storages.size();
+        pretty_progress(0, tensor_max, 0.0f);
        for (auto& tensor_storage : processed_tensor_storages) {
            if (tensor_storage.file_index != file_index) {
                ++tensor_count;
@ -2046,21 +2049,29 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
                    ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
                }
            }
-            size_t tensor_max = processed_tensor_storages.size();
-            int64_t t2        = ggml_time_ms();
-            pretty_progress(++tensor_count, tensor_max, (t2 - t1) / 1000.0f);
-            t1      = t2;
-            partial = tensor_count != tensor_max;
+            ++tensor_count;
+            int64_t t2 = ggml_time_ms();
+            if ((t2 - t1) >= 200) {
+                t1 = t2;
+                pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
+                partial = tensor_count != tensor_max;
+            }
+        }
+
+        if (partial) {
+            if (tensor_count >= 1) {
+                t1 = ggml_time_ms();
+                pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
+            }
+            if (tensor_count < tensor_max) {
+                printf("\n");
+            }
        }

        if (zip != NULL) {
            zip_close(zip);
        }

-        if (partial) {
-            printf("\n");
-        }
-
        if (!success) {
            break;
        }
Author	SHA1	Message	Date
stduhpf	4c6475f917	feat: show usage on unknown arg (#767 )	2025-09-01 21:38:34 +08:00
SmallAndSoft	f0fa7ddc40	docs: add compile option needed by Ninja (#770 )	2025-09-01 21:35:25 +08:00
SmallAndSoft	a7c7905c6d	docs: add missing dash to docs/chroma.md (#771 )	2025-09-01 21:34:34 +08:00
Wagner Bruna	eea77cbad9	feat: throttle model loading progress updates (#782 ) Some terminals have slow display latency, so frequent output during model loading can actually slow down the process. Also, since tensor loading times can vary a lot, the progress display now shows the average across past iterations instead of just the last one.	2025-09-01 21:32:01 +08:00
NekopenDev	0e86d90ee4	chore: add Nvidia 30 series (cuda arch 86) to build	2025-09-01 21:21:34 +08:00