mirror of
https://github.com/leejet/stable-diffusion.cpp.git
synced 2026-04-03 07:08:53 +00:00
279 lines
11 KiB
C++
279 lines
11 KiB
C++
#ifndef __PREPROCESSING_HPP__
|
|
#define __PREPROCESSING_HPP__
|
|
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
#include "ggml_extend.hpp"
|
|
|
|
#define M_PI_ 3.14159265358979323846f
|
|
|
|
static inline int64_t preprocessing_offset_4d(const sd::Tensor<float>& tensor, int64_t i0, int64_t i1 = 0, int64_t i2 = 0, int64_t i3 = 0) {
|
|
const auto& shape = tensor.shape();
|
|
int64_t n0 = shape.size() > 0 ? shape[0] : 1;
|
|
int64_t n1 = shape.size() > 1 ? shape[1] : 1;
|
|
int64_t n2 = shape.size() > 2 ? shape[2] : 1;
|
|
return ((i3 * n2 + i2) * n1 + i1) * n0 + i0;
|
|
}
|
|
|
|
static inline float preprocessing_get_4d(const sd::Tensor<float>& tensor, int64_t i0, int64_t i1 = 0, int64_t i2 = 0, int64_t i3 = 0) {
|
|
return tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))];
|
|
}
|
|
|
|
static inline void preprocessing_set_4d(sd::Tensor<float>& tensor, float value, int64_t i0, int64_t i1 = 0, int64_t i2 = 0, int64_t i3 = 0) {
|
|
tensor.values()[static_cast<size_t>(preprocessing_offset_4d(tensor, i0, i1, i2, i3))] = value;
|
|
}
|
|
|
|
static inline sd::Tensor<float> sd_image_to_preprocessing_tensor(sd_image_t image) {
|
|
sd::Tensor<float> tensor({static_cast<int64_t>(image.width), static_cast<int64_t>(image.height), static_cast<int64_t>(image.channel), 1});
|
|
for (uint32_t y = 0; y < image.height; ++y) {
|
|
for (uint32_t x = 0; x < image.width; ++x) {
|
|
for (uint32_t c = 0; c < image.channel; ++c) {
|
|
preprocessing_set_4d(tensor, sd_image_get_f32(image, x, y, c), x, y, c, 0);
|
|
}
|
|
}
|
|
}
|
|
return tensor;
|
|
}
|
|
|
|
static inline void preprocessing_tensor_to_sd_image(const sd::Tensor<float>& tensor, uint8_t* image_data) {
|
|
GGML_ASSERT(tensor.dim() == 4);
|
|
GGML_ASSERT(tensor.shape()[3] == 1);
|
|
GGML_ASSERT(image_data != nullptr);
|
|
|
|
int width = static_cast<int>(tensor.shape()[0]);
|
|
int height = static_cast<int>(tensor.shape()[1]);
|
|
int channel = static_cast<int>(tensor.shape()[2]);
|
|
for (int y = 0; y < height; ++y) {
|
|
for (int x = 0; x < width; ++x) {
|
|
for (int c = 0; c < channel; ++c) {
|
|
float value = preprocessing_get_4d(tensor, x, y, c, 0);
|
|
value = std::min(1.0f, std::max(0.0f, value));
|
|
image_data[(y * width + x) * channel + c] = static_cast<uint8_t>(std::round(value * 255.0f));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline sd::Tensor<float> gaussian_kernel_tensor(int kernel_size) {
|
|
sd::Tensor<float> kernel({kernel_size, kernel_size, 1, 1});
|
|
int ks_mid = kernel_size / 2;
|
|
float sigma = 1.4f;
|
|
float normal = 1.f / (2.0f * M_PI_ * std::pow(sigma, 2.0f));
|
|
for (int y = 0; y < kernel_size; ++y) {
|
|
float gx = static_cast<float>(-ks_mid + y);
|
|
for (int x = 0; x < kernel_size; ++x) {
|
|
float gy = static_cast<float>(-ks_mid + x);
|
|
float k = std::exp(-((gx * gx + gy * gy) / (2.0f * std::pow(sigma, 2.0f)))) * normal;
|
|
preprocessing_set_4d(kernel, k, x, y, 0, 0);
|
|
}
|
|
}
|
|
return kernel;
|
|
}
|
|
|
|
static inline sd::Tensor<float> convolve_tensor(const sd::Tensor<float>& input, const sd::Tensor<float>& kernel, int padding) {
|
|
GGML_ASSERT(input.dim() == 4);
|
|
GGML_ASSERT(kernel.dim() == 4);
|
|
GGML_ASSERT(input.shape()[3] == 1);
|
|
GGML_ASSERT(kernel.shape()[2] == 1);
|
|
GGML_ASSERT(kernel.shape()[3] == 1);
|
|
|
|
sd::Tensor<float> output(input.shape());
|
|
int64_t width = input.shape()[0];
|
|
int64_t height = input.shape()[1];
|
|
int64_t channels = input.shape()[2];
|
|
int64_t kernel_w = kernel.shape()[0];
|
|
int64_t kernel_h = kernel.shape()[1];
|
|
|
|
for (int64_t c = 0; c < channels; ++c) {
|
|
for (int64_t y = 0; y < height; ++y) {
|
|
for (int64_t x = 0; x < width; ++x) {
|
|
float sum = 0.0f;
|
|
for (int64_t ky = 0; ky < kernel_h; ++ky) {
|
|
int64_t iy = y + ky - padding;
|
|
if (iy < 0 || iy >= height) {
|
|
continue;
|
|
}
|
|
for (int64_t kx = 0; kx < kernel_w; ++kx) {
|
|
int64_t ix = x + kx - padding;
|
|
if (ix < 0 || ix >= width) {
|
|
continue;
|
|
}
|
|
sum += preprocessing_get_4d(input, ix, iy, c, 0) * preprocessing_get_4d(kernel, kx, ky, 0, 0);
|
|
}
|
|
}
|
|
preprocessing_set_4d(output, sum, x, y, c, 0);
|
|
}
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
|
|
static inline sd::Tensor<float> grayscale_tensor(const sd::Tensor<float>& rgb_img) {
|
|
GGML_ASSERT(rgb_img.dim() == 4);
|
|
GGML_ASSERT(rgb_img.shape()[2] >= 3);
|
|
sd::Tensor<float> grayscale({rgb_img.shape()[0], rgb_img.shape()[1], 1, rgb_img.shape()[3]});
|
|
for (int64_t iy = 0; iy < rgb_img.shape()[1]; ++iy) {
|
|
for (int64_t ix = 0; ix < rgb_img.shape()[0]; ++ix) {
|
|
float r = preprocessing_get_4d(rgb_img, ix, iy, 0, 0);
|
|
float g = preprocessing_get_4d(rgb_img, ix, iy, 1, 0);
|
|
float b = preprocessing_get_4d(rgb_img, ix, iy, 2, 0);
|
|
float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b;
|
|
preprocessing_set_4d(grayscale, gray, ix, iy, 0, 0);
|
|
}
|
|
}
|
|
return grayscale;
|
|
}
|
|
|
|
static inline sd::Tensor<float> tensor_hypot(const sd::Tensor<float>& x, const sd::Tensor<float>& y) {
|
|
sd::tensor_check_same_shape(x, y);
|
|
sd::Tensor<float> out(x.shape());
|
|
for (int64_t i = 0; i < out.numel(); ++i) {
|
|
out[i] = std::sqrt(x[i] * x[i] + y[i] * y[i]);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
static inline sd::Tensor<float> tensor_arctan2(const sd::Tensor<float>& x, const sd::Tensor<float>& y) {
|
|
sd::tensor_check_same_shape(x, y);
|
|
sd::Tensor<float> out(x.shape());
|
|
for (int64_t i = 0; i < out.numel(); ++i) {
|
|
out[i] = std::atan2(y[i], x[i]);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
static inline void normalize_tensor(sd::Tensor<float>* g) {
|
|
GGML_ASSERT(g != nullptr);
|
|
if (g->empty()) {
|
|
return;
|
|
}
|
|
float max_value = -std::numeric_limits<float>::infinity();
|
|
for (int64_t i = 0; i < g->numel(); ++i) {
|
|
max_value = std::max(max_value, (*g)[i]);
|
|
}
|
|
if (max_value == 0.0f || !std::isfinite(max_value)) {
|
|
return;
|
|
}
|
|
*g *= (1.0f / max_value);
|
|
}
|
|
|
|
static inline sd::Tensor<float> non_max_supression(const sd::Tensor<float>& G, const sd::Tensor<float>& D) {
|
|
GGML_ASSERT(G.shape() == D.shape());
|
|
sd::Tensor<float> result = sd::Tensor<float>::zeros(G.shape());
|
|
for (int64_t iy = 1; iy < result.shape()[1] - 1; ++iy) {
|
|
for (int64_t ix = 1; ix < result.shape()[0] - 1; ++ix) {
|
|
float angle = preprocessing_get_4d(D, ix, iy, 0, 0) * 180.0f / M_PI_;
|
|
angle = angle < 0.0f ? angle + 180.0f : angle;
|
|
float q = 1.0f;
|
|
float r = 1.0f;
|
|
|
|
if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180.0f)) {
|
|
q = preprocessing_get_4d(G, ix, iy + 1, 0, 0);
|
|
r = preprocessing_get_4d(G, ix, iy - 1, 0, 0);
|
|
} else if (22.5f >= angle && angle < 67.5f) {
|
|
q = preprocessing_get_4d(G, ix + 1, iy - 1, 0, 0);
|
|
r = preprocessing_get_4d(G, ix - 1, iy + 1, 0, 0);
|
|
} else if (67.5f >= angle && angle < 112.5f) {
|
|
q = preprocessing_get_4d(G, ix + 1, iy, 0, 0);
|
|
r = preprocessing_get_4d(G, ix - 1, iy, 0, 0);
|
|
} else if (112.5f >= angle && angle < 157.5f) {
|
|
q = preprocessing_get_4d(G, ix - 1, iy - 1, 0, 0);
|
|
r = preprocessing_get_4d(G, ix + 1, iy + 1, 0, 0);
|
|
}
|
|
|
|
float cur = preprocessing_get_4d(G, ix, iy, 0, 0);
|
|
preprocessing_set_4d(result, (cur >= q && cur >= r) ? cur : 0.0f, ix, iy, 0, 0);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static inline void threshold_hystersis(sd::Tensor<float>* img, float high_threshold, float low_threshold, float weak, float strong) {
|
|
GGML_ASSERT(img != nullptr);
|
|
if (img->empty()) {
|
|
return;
|
|
}
|
|
float max_value = -std::numeric_limits<float>::infinity();
|
|
for (int64_t i = 0; i < img->numel(); ++i) {
|
|
max_value = std::max(max_value, (*img)[i]);
|
|
}
|
|
|
|
float ht = max_value * high_threshold;
|
|
float lt = ht * low_threshold;
|
|
for (int64_t i = 0; i < img->numel(); ++i) {
|
|
float img_v = (*img)[i];
|
|
if (img_v >= ht) {
|
|
(*img)[i] = strong;
|
|
} else if (img_v <= ht && img_v >= lt) {
|
|
(*img)[i] = weak;
|
|
}
|
|
}
|
|
|
|
for (int64_t iy = 0; iy < img->shape()[1]; ++iy) {
|
|
for (int64_t ix = 0; ix < img->shape()[0]; ++ix) {
|
|
if (!(ix >= 3 && ix <= img->shape()[0] - 3 && iy >= 3 && iy <= img->shape()[1] - 3)) {
|
|
preprocessing_set_4d(*img, 0.0f, ix, iy, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int64_t iy = 1; iy < img->shape()[1] - 1; ++iy) {
|
|
for (int64_t ix = 1; ix < img->shape()[0] - 1; ++ix) {
|
|
float imd_v = preprocessing_get_4d(*img, ix, iy, 0, 0);
|
|
if (imd_v == weak) {
|
|
bool has_strong_neighbor =
|
|
preprocessing_get_4d(*img, ix + 1, iy - 1, 0, 0) == strong ||
|
|
preprocessing_get_4d(*img, ix + 1, iy, 0, 0) == strong ||
|
|
preprocessing_get_4d(*img, ix, iy - 1, 0, 0) == strong ||
|
|
preprocessing_get_4d(*img, ix, iy + 1, 0, 0) == strong ||
|
|
preprocessing_get_4d(*img, ix - 1, iy - 1, 0, 0) == strong ||
|
|
preprocessing_get_4d(*img, ix - 1, iy, 0, 0) == strong;
|
|
preprocessing_set_4d(*img, has_strong_neighbor ? strong : 0.0f, ix, iy, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
|
|
float kX[9] = {
|
|
-1, 0, 1,
|
|
-2, 0, 2,
|
|
-1, 0, 1};
|
|
|
|
float kY[9] = {
|
|
1, 2, 1,
|
|
0, 0, 0,
|
|
-1, -2, -1};
|
|
|
|
sd::Tensor<float> gkernel = gaussian_kernel_tensor(5);
|
|
sd::Tensor<float> sf_kx({3, 3, 1, 1}, std::vector<float>(kX, kX + 9));
|
|
sd::Tensor<float> sf_ky({3, 3, 1, 1}, std::vector<float>(kY, kY + 9));
|
|
|
|
sd::Tensor<float> image = sd_image_to_preprocessing_tensor(img);
|
|
sd::Tensor<float> image_gray = grayscale_tensor(image);
|
|
image_gray = convolve_tensor(image_gray, gkernel, 2);
|
|
sd::Tensor<float> iX = convolve_tensor(image_gray, sf_kx, 1);
|
|
sd::Tensor<float> iY = convolve_tensor(image_gray, sf_ky, 1);
|
|
sd::Tensor<float> G = tensor_hypot(iX, iY);
|
|
normalize_tensor(&G);
|
|
sd::Tensor<float> theta = tensor_arctan2(iX, iY);
|
|
image_gray = non_max_supression(G, theta);
|
|
threshold_hystersis(&image_gray, high_threshold, low_threshold, weak, strong);
|
|
|
|
for (uint32_t iy = 0; iy < img.height; ++iy) {
|
|
for (uint32_t ix = 0; ix < img.width; ++ix) {
|
|
float gray = preprocessing_get_4d(image_gray, ix, iy, 0, 0);
|
|
gray = inverse ? 1.0f - gray : gray;
|
|
for (uint32_t c = 0; c < img.channel; ++c) {
|
|
preprocessing_set_4d(image, gray, ix, iy, c, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
preprocessing_tensor_to_sd_image(image, img.data);
|
|
return true;
|
|
}
|
|
|
|
#endif // __PREPROCESSING_HPP__
|