mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2025-12-13 05:48:56 +00:00
Compare commits
3 Commits
036ba9e6d8
...
ce1bcc74a6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce1bcc74a6 | ||
|
|
760cfaa618 | ||
|
|
6d16f6853e |
@ -190,12 +190,13 @@ arguments:
|
|||||||
--rng {std_default, cuda} RNG (default: cuda)
|
--rng {std_default, cuda} RNG (default: cuda)
|
||||||
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
|
||||||
-b, --batch-count COUNT number of images to generate.
|
-b, --batch-count COUNT number of images to generate.
|
||||||
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
|
--schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)
|
||||||
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
|
||||||
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
|
||||||
--vae-tiling process vae in tiles to reduce memory usage
|
--vae-tiling process vae in tiles to reduce memory usage
|
||||||
--control-net-cpu keep controlnet in cpu (for low vram)
|
--control-net-cpu keep controlnet in cpu (for low vram)
|
||||||
--canny apply canny preprocessor (edge detection)
|
--canny apply canny preprocessor (edge detection)
|
||||||
|
--color colors the logging tags according to level
|
||||||
-v, --verbose print extra info
|
-v, --verbose print extra info
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
139
denoiser.hpp
139
denoiser.hpp
@ -13,6 +13,7 @@ struct SigmaSchedule {
|
|||||||
float alphas_cumprod[TIMESTEPS];
|
float alphas_cumprod[TIMESTEPS];
|
||||||
float sigmas[TIMESTEPS];
|
float sigmas[TIMESTEPS];
|
||||||
float log_sigmas[TIMESTEPS];
|
float log_sigmas[TIMESTEPS];
|
||||||
|
int version = 0;
|
||||||
|
|
||||||
virtual std::vector<float> get_sigmas(uint32_t n) = 0;
|
virtual std::vector<float> get_sigmas(uint32_t n) = 0;
|
||||||
|
|
||||||
@ -75,6 +76,144 @@ struct DiscreteSchedule : SigmaSchedule {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html
|
||||||
|
*/
|
||||||
|
struct AYSSchedule : SigmaSchedule {
|
||||||
|
/* interp and linear_interp adapted from dpilger26's NumCpp library:
|
||||||
|
* https://github.com/dpilger26/NumCpp/tree/5e40aab74d14e257d65d3dc385c9ff9e2120c60e */
|
||||||
|
constexpr double interp(double left, double right, double perc) noexcept {
|
||||||
|
return (left * (1. - perc)) + (right * perc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This will make the assumption that the reference x and y values are
|
||||||
|
* already sorted in ascending order because they are being generated as
|
||||||
|
* such in the calling function */
|
||||||
|
std::vector<double> linear_interp(std::vector<float> new_x,
|
||||||
|
const std::vector<float> ref_x,
|
||||||
|
const std::vector<float> ref_y) {
|
||||||
|
const size_t len_x = new_x.size();
|
||||||
|
size_t i = 0;
|
||||||
|
size_t j = 0;
|
||||||
|
std::vector<double> new_y(len_x);
|
||||||
|
|
||||||
|
if (ref_x.size() != ref_y.size()) {
|
||||||
|
LOG_ERROR("Linear Interoplation Failed: length mismatch");
|
||||||
|
return new_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* serves as the bounds checking for the below while loop */
|
||||||
|
if ((new_x[0] < ref_x[0]) || (new_x[new_x.size() - 1] > ref_x[ref_x.size() - 1])) {
|
||||||
|
LOG_ERROR("Linear Interpolation Failed: bad bounds");
|
||||||
|
return new_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (i < len_x) {
|
||||||
|
if ((ref_x[j] > new_x[i]) || (new_x[i] > ref_x[j + 1])) {
|
||||||
|
j++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const double perc = static_cast<double>(new_x[i] - ref_x[j]) / static_cast<double>(ref_x[j + 1] - ref_x[j]);
|
||||||
|
|
||||||
|
new_y[i] = interp(ref_y[j], ref_y[j + 1], perc);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> linear_space(const float start, const float end, const size_t num_points) {
|
||||||
|
std::vector<float> result(num_points);
|
||||||
|
const float inc = (end - start) / (static_cast<float>(num_points - 1));
|
||||||
|
|
||||||
|
if (num_points > 0) {
|
||||||
|
result[0] = start;
|
||||||
|
|
||||||
|
for (size_t i = 1; i < num_points; i++) {
|
||||||
|
result[i] = result[i - 1] + inc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> log_linear_interpolation(std::vector<float> sigma_in,
|
||||||
|
const size_t new_len) {
|
||||||
|
const size_t s_len = sigma_in.size();
|
||||||
|
std::vector<float> x_vals = linear_space(0.f, 1.f, s_len);
|
||||||
|
std::vector<float> y_vals(s_len);
|
||||||
|
|
||||||
|
/* Reverses the input array to be ascending instead of descending,
|
||||||
|
* also hits it with a log, it is log-linear interpolation after all */
|
||||||
|
for (size_t i = 0; i < s_len; i++) {
|
||||||
|
y_vals[i] = std::log(sigma_in[s_len - i - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> new_x_vals = linear_space(0.f, 1.f, new_len);
|
||||||
|
std::vector<double> new_y_vals = linear_interp(new_x_vals, x_vals, y_vals);
|
||||||
|
std::vector<float> results(new_len);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < new_len; i++) {
|
||||||
|
results[i] = static_cast<float>(std::exp(new_y_vals[new_len - i - 1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> get_sigmas(uint32_t len) {
|
||||||
|
const std::vector<float> noise_levels[] = {
|
||||||
|
/* SD1.5 */
|
||||||
|
{14.6146412293f, 6.4745760956f, 3.8636745985f, 2.6946151520f,
|
||||||
|
1.8841921177f, 1.3943805092f, 0.9642583904f, 0.6523686016f,
|
||||||
|
0.3977456272f, 0.1515232662f, 0.0291671582f},
|
||||||
|
/* SDXL */
|
||||||
|
{14.6146412293f, 6.3184485287f, 3.7681790315f, 2.1811480769f,
|
||||||
|
1.3405244945f, 0.8620721141f, 0.5550693289f, 0.3798540708f,
|
||||||
|
0.2332364134f, 0.1114188177f, 0.0291671582f},
|
||||||
|
/* SVD */
|
||||||
|
{700.00f, 54.5f, 15.886f, 7.977f, 4.248f, 1.789f, 0.981f, 0.403f,
|
||||||
|
0.173f, 0.034f, 0.002f},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<float> inputs;
|
||||||
|
std::vector<float> results(len + 1);
|
||||||
|
|
||||||
|
switch (version) {
|
||||||
|
case VERSION_2_x: /* fallthrough */
|
||||||
|
LOG_WARN("AYS not designed for SD2.X models");
|
||||||
|
case VERSION_1_x:
|
||||||
|
LOG_INFO("AYS using SD1.5 noise levels");
|
||||||
|
inputs = noise_levels[0];
|
||||||
|
break;
|
||||||
|
case VERSION_XL:
|
||||||
|
LOG_INFO("AYS using SDXL noise levels");
|
||||||
|
inputs = noise_levels[1];
|
||||||
|
break;
|
||||||
|
case VERSION_SVD:
|
||||||
|
LOG_INFO("AYS using SVD noise levels");
|
||||||
|
inputs = noise_levels[2];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR("Version not compatable with AYS scheduler");
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Stretches those pre-calculated reference levels out to the desired
|
||||||
|
* size using log-linear interpolation */
|
||||||
|
if ((len + 1) != inputs.size()) {
|
||||||
|
results = log_linear_interpolation(inputs, len + 1);
|
||||||
|
} else {
|
||||||
|
results = inputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not sure if this is strictly neccessary */
|
||||||
|
results[len] = 0.0f;
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct KarrasSchedule : SigmaSchedule {
|
struct KarrasSchedule : SigmaSchedule {
|
||||||
std::vector<float> get_sigmas(uint32_t n) {
|
std::vector<float> get_sigmas(uint32_t n) {
|
||||||
// These *COULD* be function arguments here,
|
// These *COULD* be function arguments here,
|
||||||
|
|||||||
@ -43,6 +43,7 @@ const char* schedule_str[] = {
|
|||||||
"default",
|
"default",
|
||||||
"discrete",
|
"discrete",
|
||||||
"karras",
|
"karras",
|
||||||
|
"ays",
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* modes_str[] = {
|
const char* modes_str[] = {
|
||||||
@ -190,12 +191,13 @@ void print_usage(int argc, const char* argv[]) {
|
|||||||
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
|
||||||
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
|
||||||
printf(" -b, --batch-count COUNT number of images to generate.\n");
|
printf(" -b, --batch-count COUNT number of images to generate.\n");
|
||||||
printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n");
|
printf(" --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)\n");
|
||||||
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
|
||||||
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
|
||||||
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
|
||||||
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
|
||||||
printf(" --canny apply canny preprocessor (edge detection)\n");
|
printf(" --canny apply canny preprocessor (edge detection)\n");
|
||||||
|
printf(" --color Colors the logging tags according to level\n");
|
||||||
printf(" -v, --verbose print extra info\n");
|
printf(" -v, --verbose print extra info\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -454,7 +454,7 @@ __STATIC_INLINE__ void sd_tiling(ggml_tensor* input, ggml_tensor* output, const
|
|||||||
ggml_tensor* input_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size, tile_size, input->ne[2], 1);
|
ggml_tensor* input_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size, tile_size, input->ne[2], 1);
|
||||||
ggml_tensor* output_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size * scale, tile_size * scale, output->ne[2], 1);
|
ggml_tensor* output_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, tile_size * scale, tile_size * scale, output->ne[2], 1);
|
||||||
on_processing(input_tile, NULL, true);
|
on_processing(input_tile, NULL, true);
|
||||||
int num_tiles = (input_width * input_height) / (non_tile_overlap * non_tile_overlap);
|
int num_tiles = ceil((float)input_width / non_tile_overlap) * ceil((float)input_height / non_tile_overlap);
|
||||||
LOG_INFO("processing %i tiles", num_tiles);
|
LOG_INFO("processing %i tiles", num_tiles);
|
||||||
pretty_progress(1, num_tiles, 0.0f);
|
pretty_progress(1, num_tiles, 0.0f);
|
||||||
int tile_count = 1;
|
int tile_count = 1;
|
||||||
|
|||||||
@ -888,6 +888,12 @@ bool ModelLoader::init_from_safetensors_file(const std::string& file_path, const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml/src/ggml.c:2745
|
||||||
|
if (n_dims < 1 || n_dims > GGML_MAX_DIMS) {
|
||||||
|
LOG_ERROR("skip tensor '%s' with n_dims %d", name.c_str(), n_dims);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
TensorStorage tensor_storage(prefix + name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
|
TensorStorage tensor_storage(prefix + name, type, ne, n_dims, file_index, ST_HEADER_SIZE_LEN + header_size_ + begin);
|
||||||
tensor_storage.reverse_ne();
|
tensor_storage.reverse_ne();
|
||||||
|
|
||||||
|
|||||||
@ -450,6 +450,11 @@ public:
|
|||||||
LOG_INFO("running with Karras schedule");
|
LOG_INFO("running with Karras schedule");
|
||||||
denoiser->schedule = std::make_shared<KarrasSchedule>();
|
denoiser->schedule = std::make_shared<KarrasSchedule>();
|
||||||
break;
|
break;
|
||||||
|
case AYS:
|
||||||
|
LOG_INFO("Running with Align-Your-Steps schedule");
|
||||||
|
denoiser->schedule = std::make_shared<AYSSchedule>();
|
||||||
|
denoiser->schedule->version = version;
|
||||||
|
break;
|
||||||
case DEFAULT:
|
case DEFAULT:
|
||||||
// Don't touch anything.
|
// Don't touch anything.
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -49,6 +49,7 @@ enum schedule_t {
|
|||||||
DEFAULT,
|
DEFAULT,
|
||||||
DISCRETE,
|
DISCRETE,
|
||||||
KARRAS,
|
KARRAS,
|
||||||
|
AYS,
|
||||||
N_SCHEDULES
|
N_SCHEDULES
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user