Compare commits

..

No commits in common. "d46ed5e184b97c2018dc2e8105925bdb8775e02c" and "5eb15ef4d022bef4a391de4f5f6556e81fbb5024" have entirely different histories.

5 changed files with 17 additions and 59 deletions

View File

@ -163,7 +163,7 @@ jobs:
- build: "avx512"
defines: "-DGGML_NATIVE=OFF -DGGML_AVX512=ON -DGGML_AVX=ON -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON"
- build: "cuda12"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90;89;80;75"
defines: "-DSD_CUDA=ON -DSD_BUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=60;61;70;75"
# - build: "rocm5.5"
# defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
- build: 'vulkan'
@ -178,9 +178,9 @@ jobs:
- name: Install cuda-toolkit
id: cuda-toolkit
if: ${{ matrix.build == 'cuda12' }}
uses: Jimver/cuda-toolkit@v0.2.19
uses: Jimver/cuda-toolkit@v0.2.11
with:
cuda: "12.6.2"
cuda: "12.2.0"
method: "network"
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'

View File

@ -64,7 +64,7 @@ endif ()
if(SD_MUSA)
message("-- Use MUSA as backend stable-diffusion")
set(GGML_MUSA ON)
add_definitions(-DSD_USE_CUDA)
add_definitions(-DSD_USE_CUBLAS)
if(SD_FAST_SOFTMAX)
set(GGML_CUDA_FAST_SOFTMAX ON)
endif()

View File

@ -1057,41 +1057,16 @@ int main(int argc, const char* argv[]) {
}
}
std::string dummy_name, ext, lc_ext;
bool is_jpg;
size_t last = params.output_path.find_last_of(".");
size_t last_path = std::min(params.output_path.find_last_of("/"),
params.output_path.find_last_of("\\"));
if (last != std::string::npos // filename has extension
&& (last_path == std::string::npos || last > last_path)) {
dummy_name = params.output_path.substr(0, last);
ext = lc_ext = params.output_path.substr(last);
std::transform(ext.begin(), ext.end(), lc_ext.begin(), ::tolower);
is_jpg = lc_ext == ".jpg" || lc_ext == ".jpeg" || lc_ext == ".jpe";
} else {
dummy_name = params.output_path;
ext = lc_ext = "";
is_jpg = false;
}
// appending ".png" to absent or unknown extension
if (!is_jpg && lc_ext != ".png") {
dummy_name += ext;
ext = ".png";
}
size_t last = params.output_path.find_last_of(".");
std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
for (int i = 0; i < params.batch_count; i++) {
if (results[i].data == NULL) {
continue;
}
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ext : dummy_name + ext;
if(is_jpg) {
stbi_write_jpg(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
results[i].data, 90, get_image_params(params, params.seed + i).c_str());
printf("save result JPEG image to '%s'\n", final_image_path.c_str());
} else {
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
printf("save result PNG image to '%s'\n", final_image_path.c_str());
}
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
printf("save result image to '%s'\n", final_image_path.c_str());
free(results[i].data);
results[i].data = NULL;
}

View File

@ -615,12 +615,9 @@ struct LoraModel : public GGMLRunner {
scale_value *= multiplier;
// flat lora tensors to multiply it
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
lora_up = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
auto lora_down_n_dims = ggml_n_dims(lora_down);
// assume n_dims should always be a multiple of 2 (otherwise rank 1 doesn't work)
lora_down_n_dims = (lora_down_n_dims + lora_down_n_dims % 2);
int64_t lora_down_rows = lora_down->ne[lora_down_n_dims - 1];
int64_t lora_up_rows = lora_up->ne[ggml_n_dims(lora_up) - 1];
lora_up = ggml_reshape_2d(compute_ctx, lora_up, ggml_nelements(lora_up) / lora_up_rows, lora_up_rows);
int64_t lora_down_rows = lora_down->ne[ggml_n_dims(lora_down) - 1];
lora_down = ggml_reshape_2d(compute_ctx, lora_down, ggml_nelements(lora_down) / lora_down_rows, lora_down_rows);
// ggml_mul_mat requires tensor b transposed

View File

@ -1412,7 +1412,7 @@ static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt
return DU[0];
}
static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality, const char* parameters) {
static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
// Constants that don't pollute global namespace
static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
@ -1521,20 +1521,6 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, in
s->func(s->context, (void*)YTable, sizeof(YTable));
stbiw__putc(s, 1);
s->func(s->context, UVTable, sizeof(UVTable));
// comment block with parameters of generation
if(parameters != NULL) {
stbiw__putc(s, 0xFF /* comnent */ );
stbiw__putc(s, 0xFE /* marker */ );
size_t param_length = std::min(2 + strlen("parameters") + 1 + strlen(parameters) + 1, (size_t) 0xFFFF);
stbiw__putc(s, param_length >> 8); // no need to mask, length < 65536
stbiw__putc(s, param_length & 0xFF);
s->func(s->context, (void*)"parameters", strlen("parameters") + 1); // std::string is zero-terminated
s->func(s->context, (void*)parameters, std::min(param_length, (size_t) 65534) - 2 - strlen("parameters") - 1);
if(param_length > 65534) stbiw__putc(s, 0); // always zero-terminate for safety
if(param_length & 1) stbiw__putc(s, 0xFF); // pad to even length
}
s->func(s->context, (void*)head1, sizeof(head1));
s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
@ -1639,16 +1625,16 @@ STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x,
{
stbi__write_context s = { 0 };
stbi__start_write_callbacks(&s, func, context);
return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality, NULL);
return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
}
#ifndef STBI_WRITE_NO_STDIO
STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality, const char* parameters)
STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
{
stbi__write_context s = { 0 };
if (stbi__start_write_file(&s,filename)) {
int r = stbi_write_jpg_core(&s, x, y, comp, data, quality, parameters);
int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
stbi__end_write_file(&s);
return r;
} else