Compare commits

..

5 Commits

Author SHA1 Message Date
stduhpf
4c6475f917
feat: show usage on unknown arg (#767) 2025-09-01 21:38:34 +08:00
SmallAndSoft
f0fa7ddc40
docs: add compile option needed by Ninja (#770) 2025-09-01 21:35:25 +08:00
SmallAndSoft
a7c7905c6d
docs: add missing dash to docs/chroma.md (#771) 2025-09-01 21:34:34 +08:00
Wagner Bruna
eea77cbad9
feat: throttle model loading progress updates (#782)
Some terminals have slow display latency, so frequent output
during model loading can actually slow down the process.

Also, since tensor loading times can vary a lot, the progress
display now shows the average across past iterations instead
of just the last one.
2025-09-01 21:32:01 +08:00
NekopenDev
0e86d90ee4
chore: add Nvidia 30 series (cuda arch 86) to build 2025-09-01 21:21:34 +08:00
5 changed files with 35 additions and 14 deletions

View File

@ -163,7 +163,7 @@ jobs:
- build: "avx512"
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
- build: "cuda12"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;86;80;75"
# - build: "rocm5.5"
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
- build: 'vulkan'

View File

@ -129,7 +129,7 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
```
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100
cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
cmake --build . --config Release
```

View File

@ -24,7 +24,7 @@ You can download the preconverted gguf weights from [silveroxides/Chroma-GGUF](h
For example:
```
.\bin\Release\sd.exe -diffusion-model ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask
.\bin\Release\sd.exe --diffusion-model ..\models\chroma-unlocked-v40-q8_0.gguf --vae ..\models\ae.sft --t5xxl ..\models\t5xxl_fp16.safetensors -p "a lovely cat holding a sign says 'chroma.cpp'" --cfg-scale 4.0 --sampling-method euler -v --chroma-disable-dit-mask
```
![](../assets/flux/chroma_v40.png)

View File

@ -297,10 +297,12 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
bool invalid_arg = false;
std::string arg;
for (int i = 1; i < argc; i++) {
bool found_arg = false;
arg = argv[i];
for (auto& option : options.string_options) {
if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
found_arg = true;
if (++i >= argc) {
invalid_arg = true;
break;
@ -314,6 +316,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
for (auto& option : options.int_options) {
if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
found_arg = true;
if (++i >= argc) {
invalid_arg = true;
break;
@ -327,6 +330,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
for (auto& option : options.float_options) {
if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
found_arg = true;
if (++i >= argc) {
invalid_arg = true;
break;
@ -340,6 +344,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
for (auto& option : options.bool_options) {
if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
found_arg = true;
if (option.keep_true) {
*option.target = true;
} else {
@ -353,6 +358,7 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
for (auto& option : options.manual_options) {
if ((option.short_name.size() > 0 && arg == option.short_name) || (option.long_name.size() > 0 && arg == option.long_name)) {
found_arg = true;
int ret = option.cb(argc, argv, i);
if (ret < 0) {
invalid_arg = true;
@ -364,6 +370,10 @@ bool parse_options(int argc, const char** argv, ArgOptions& options) {
if (invalid_arg) {
break;
}
if (!found_arg) {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
return false;
}
}
if (invalid_arg) {
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());

View File

@ -1942,8 +1942,11 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
return true;
};
int tensor_count = 0;
int64_t t1 = ggml_time_ms();
bool partial = false;
int64_t t0 = ggml_time_ms();
int64_t t1 = t0;
bool partial = true;
int tensor_max = (int)processed_tensor_storages.size();
pretty_progress(0, tensor_max, 0.0f);
for (auto& tensor_storage : processed_tensor_storages) {
if (tensor_storage.file_index != file_index) {
++tensor_count;
@ -2046,21 +2049,29 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend
ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor));
}
}
size_t tensor_max = processed_tensor_storages.size();
int64_t t2 = ggml_time_ms();
pretty_progress(++tensor_count, tensor_max, (t2 - t1) / 1000.0f);
t1 = t2;
partial = tensor_count != tensor_max;
++tensor_count;
int64_t t2 = ggml_time_ms();
if ((t2 - t1) >= 200) {
t1 = t2;
pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
partial = tensor_count != tensor_max;
}
}
if (partial) {
if (tensor_count >= 1) {
t1 = ggml_time_ms();
pretty_progress(tensor_count, tensor_max, (t1 - t0) / (1000.0f * tensor_count));
}
if (tensor_count < tensor_max) {
printf("\n");
}
}
if (zip != NULL) {
zip_close(zip);
}
if (partial) {
printf("\n");
}
if (!success) {
break;
}