fix: correct canny preprocessor (#861)

2026-02-04 10:53:34 +00:00 · 2025-10-13 11:02:35 -03:00 · 2025-10-13 11:02:35 -03:00 · 5436f6b814
commit 5436f6b814
parent 1c32fa03bc
3 changed files with 8 additions and 8 deletions
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@ -372,12 +372,14 @@ __STATIC_INLINE__ float sigmoid(float x) {

 // SPECIAL OPERATIONS WITH TENSORS

-__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input) {
+__STATIC_INLINE__ uint8_t* sd_tensor_to_image(struct ggml_tensor* input, uint8_t* image_data = nullptr) {
    int64_t width    = input->ne[0];
    int64_t height   = input->ne[1];
    int64_t channels = input->ne[2];
    GGML_ASSERT(channels == 3 && input->type == GGML_TYPE_F32);
-    uint8_t* image_data = (uint8_t*)malloc(width * height * channels);
+    if (image_data == nullptr) {
+        image_data = (uint8_t*)malloc(width * height * channels);
+    }
    for (int iy = 0; iy < height; iy++) {
        for (int ix = 0; ix < width; ix++) {
            for (int k = 0; k < channels; k++) {
--- a/preprocessing.hpp
+++ b/preprocessing.hpp
@ -6,7 +6,7 @@

 void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
    struct ggml_init_params params;
-    params.mem_size                 = 20 * 1024 * 1024;  // 10
+    params.mem_size                 = 80 * input->ne[0] * input->ne[1];  // 20M for 512x512
    params.mem_buffer               = NULL;
    params.no_alloc                 = false;
    struct ggml_context* ctx0       = ggml_init(params);
@ -164,7 +164,7 @@ void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float lo

 bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
    struct ggml_init_params params;
-    params.mem_size               = static_cast<size_t>(10 * 1024 * 1024);  // 10MB
+    params.mem_size               = static_cast<size_t>(40 * img.width * img.height);  // 10MB for 512x512
    params.mem_buffer             = NULL;
    params.no_alloc               = false;
    struct ggml_context* work_ctx = ggml_init(params);
@ -218,9 +218,7 @@ bool preprocess_canny(sd_image_t img, float high_threshold, float low_threshold,
            ggml_tensor_set_f32(image, gray, ix, iy, 2);
        }
    }
-    uint8_t* output = sd_tensor_to_image(image);
-    free(img.data);
-    img.data = output;
+    sd_tensor_to_image(image, img.data);
    ggml_free(work_ctx);
    return true;
 }
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@ -1434,7 +1434,7 @@ public:
            int ne3;
            if (sd_version_is_qwen_image(version)) {
                ne2 = 1;
-                ne3 = C*x->ne[3];
+                ne3 = C * x->ne[3];
            } else {
                if (!use_tiny_autoencoder) {
                    C *= 2;