From 78405c9543a77eb4c930b32fe591ec9edc69c986 Mon Sep 17 00:00:00 2001 From: DarthAffe Date: Mon, 30 Jun 2025 21:24:49 +0200 Subject: [PATCH] Added edit-mode --- StableDiffusion.NET/Helper/ImageHelper.cs | 35 +- StableDiffusion.NET/Models/DiffusionModel.cs | 515 +++++++++---------- StableDiffusion.NET/Native/Native.cs | 31 +- 3 files changed, 317 insertions(+), 264 deletions(-) diff --git a/StableDiffusion.NET/Helper/ImageHelper.cs b/StableDiffusion.NET/Helper/ImageHelper.cs index 1c38a31..f620439 100644 --- a/StableDiffusion.NET/Helper/ImageHelper.cs +++ b/StableDiffusion.NET/Helper/ImageHelper.cs @@ -28,12 +28,19 @@ internal static class ImageHelper return image; } - public static unsafe void Dispose(Native.sd_image_t image) + public static unsafe void Dispose(Native.sd_image_t image) => Marshal.FreeHGlobal((nint)image.data); + + public static unsafe Native.sd_image_t ToSdImage(this IImage image, out nint dataPtr) { - Marshal.FreeHGlobal((nint)image.data); + int sizeInBytes = image.SizeInBytes; + + dataPtr = Marshal.AllocHGlobal(sizeInBytes); + image.CopyTo(new Span((void*)dataPtr, sizeInBytes)); + + return image.ToSdImage((byte*)dataPtr); } - public static unsafe Native.sd_image_t ToSdImage(this IImage image, byte* pinnedReference) + public static unsafe Native.sd_image_t ToSdImage(this IImage image, byte* pinnedReference) => new() { width = (uint)image.Width, @@ -41,4 +48,26 @@ internal static class ImageHelper channel = (uint)image.ColorFormat.BytesPerPixel, data = pinnedReference }; + + public static unsafe Native.sd_image_t* ToSdImagePtr(this IImage image, out nint dataPtr) + { + int sizeInBytes = image.SizeInBytes; + + dataPtr = Marshal.AllocHGlobal(sizeInBytes); + image.CopyTo(new Span((void*)dataPtr, sizeInBytes)); + + return image.ToSdImagePtr((byte*)dataPtr); + } + + public static unsafe Native.sd_image_t* ToSdImagePtr(this IImage image, byte* pinnedReference) + { + Native.sd_image_t* nativeImage = (Native.sd_image_t*)Marshal.AllocHGlobal(sizeof(Native.sd_image_t)); + + nativeImage->width = (uint)image.Width; + nativeImage->height = (uint)image.Height; + nativeImage->channel = (uint)image.ColorFormat.BytesPerPixel; + nativeImage->data = pinnedReference; + + return nativeImage; + } } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/DiffusionModel.cs b/StableDiffusion.NET/Models/DiffusionModel.cs index 385e2a6..9b72103 100644 --- a/StableDiffusion.NET/Models/DiffusionModel.cs +++ b/StableDiffusion.NET/Models/DiffusionModel.cs @@ -1,7 +1,8 @@ -using System; -using HPPH; -using System.Runtime.InteropServices; +using HPPH; using JetBrains.Annotations; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; namespace StableDiffusion.NET; @@ -82,121 +83,22 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.Validate(); - Native.sd_image_t* result; - if (parameter.ControlNet.IsEnabled) + List ptrsToFree = []; + + try { - if (parameter.ControlNet.Image is not IImage controlNetImage) - controlNetImage = parameter.ControlNet.Image!.ConvertTo(); + NativeParameters nativeParameters = PrefillParameters(prompt, parameter); + SetControlNetParameters(ref nativeParameters, parameter, ptrsToFree); - fixed (byte* imagePtr = controlNetImage.ToRawArray()) - { - if (parameter.ControlNet.CannyPreprocess) - { - Native.sd_image_t nativeControlNetImage = new() - { - width = (uint)controlNetImage.Width, - height = (uint)controlNetImage.Height, - channel = (uint)controlNetImage.ColorFormat.BytesPerPixel, - data = Native.preprocess_canny(imagePtr, - parameter.Width, - parameter.Height, - parameter.ControlNet.CannyHighThreshold, - parameter.ControlNet.CannyLowThreshold, - parameter.ControlNet.CannyWeak, - parameter.ControlNet.CannyStrong, - parameter.ControlNet.CannyInverse) - }; + Native.sd_image_t* result = Txt2Img(nativeParameters); - result = Native.txt2img(_ctx, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Eta, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Seed, - 1, - &nativeControlNetImage, - parameter.ControlNet.Strength, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); - - Marshal.FreeHGlobal((nint)nativeControlNetImage.data); - } - else - { - Native.sd_image_t nativeControlNetImage = new() - { - width = (uint)controlNetImage.Width, - height = (uint)controlNetImage.Height, - channel = (uint)controlNetImage.ColorFormat.BytesPerPixel, - data = imagePtr - }; - - result = Native.txt2img(_ctx, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Eta, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Seed, - 1, - &nativeControlNetImage, - parameter.ControlNet.Strength, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); - } - } + return ImageHelper.ToImage(result); } - else + finally { - result = Native.txt2img(_ctx, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Eta, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Seed, - 1, - null, - 0, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); + foreach (nint ptr in ptrsToFree) + Marshal.FreeHGlobal(ptr); } - - return ImageHelper.ToImage(result); } public IImage ImageToImage(string prompt, IImage image, DiffusionParameter? parameter = null) @@ -209,26 +111,11 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.Validate(); - if (image is not IImage refImage) - refImage = image.ConvertTo(); - // DarthAffe 10.08.2024: Mask needs to be a 1 channel all max value image when it's not used - I really don't like this concept as it adds unnecessary allocations, but that's how it is :( Span maskBuffer = new byte[image.Width * image.Height]; maskBuffer.Fill(byte.MaxValue); - fixed (byte* maskPtr = maskBuffer) - { - Native.sd_image_t maskImage = new() - { - width = (uint)image.Width, - height = (uint)image.Height, - channel = 1, - data = maskPtr - }; - - fixed (byte* imagePtr = refImage.AsRefImage()) - return ImageToImage(prompt, refImage.ToSdImage(imagePtr), maskImage, parameter); - } + return InternalImageToImage(prompt, image, maskBuffer, parameter); } public IImage Inpaint(string prompt, IImage image, IImage mask, DiffusionParameter? parameter = null) @@ -240,13 +127,10 @@ public sealed unsafe class DiffusionModel : IDisposable ArgumentNullException.ThrowIfNull(image); ArgumentNullException.ThrowIfNull(mask); - if (image.Width != mask.Width) throw new ArgumentException("The mask needs to have the same with as the image.", nameof(mask)); - if (image.Height != mask.Height) throw new ArgumentException("The mask needs to have the same height as the image.", nameof(mask)); - parameter.Validate(); - if (image is not IImage refImage) - refImage = image.ConvertTo(); + if (image.Width != mask.Width) throw new ArgumentException("The mask needs to have the same with as the image.", nameof(mask)); + if (image.Height != mask.Height) throw new ArgumentException("The mask needs to have the same height as the image.", nameof(mask)); // DarthAffe 10.08.2024: HPPH does currently not support monochrome images, that's why we need to convert it here. We're going for the simple conversion as the source image is supposed to be monochrome anyway. Span maskBuffer = new byte[image.Width * image.Height]; @@ -257,152 +141,232 @@ public sealed unsafe class DiffusionModel : IDisposable maskBuffer[(image.Width * y) + x] = (byte)Math.Round((color.R + color.G + color.B) / 3.0); } - fixed (byte* maskPtr = maskBuffer) - { - Native.sd_image_t maskImage = new() - { - width = (uint)image.Width, - height = (uint)image.Height, - channel = 1, - data = maskPtr - }; - - fixed (byte* imagePtr = refImage.AsRefImage()) - return ImageToImage(prompt, refImage.ToSdImage(imagePtr), maskImage, parameter); - } - + return InternalImageToImage(prompt, image, maskBuffer, parameter); } - private IImage ImageToImage(string prompt, Native.sd_image_t image, Native.sd_image_t mask, DiffusionParameter parameter) + public IImage Edit(string prompt, IImage[] refImages, DiffusionParameter? parameter = null) { + parameter ??= GetDefaultParameter(); + ObjectDisposedException.ThrowIf(_disposed, this); ArgumentNullException.ThrowIfNull(prompt); + ArgumentNullException.ThrowIfNull(refImages); parameter.Validate(); - Native.sd_image_t* result; - if (parameter.ControlNet.IsEnabled) + List ptrsToFree = []; + + try { - if (parameter.ControlNet.Image is not IImage controlNetImage) - controlNetImage = parameter.ControlNet.Image!.ConvertTo(); + NativeParameters nativeParameters = PrefillParameters(prompt, parameter); + SetControlNetParameters(ref nativeParameters, parameter, ptrsToFree); - fixed (byte* imagePtr = controlNetImage.ToRawArray()) + Native.sd_image_t[] nativeRefImages = new Native.sd_image_t[refImages.Length]; + + for (int i = 0; i < refImages.Length; i++) { - if (parameter.ControlNet.CannyPreprocess) - { - Native.sd_image_t nativeControlNetImage = new() - { - width = (uint)controlNetImage.Width, - height = (uint)controlNetImage.Height, - channel = (uint)controlNetImage.ColorFormat.BytesPerPixel, - data = Native.preprocess_canny(imagePtr, - parameter.Width, - parameter.Height, - parameter.ControlNet.CannyHighThreshold, - parameter.ControlNet.CannyLowThreshold, - parameter.ControlNet.CannyWeak, - parameter.ControlNet.CannyStrong, - parameter.ControlNet.CannyInverse) - }; + IImage image = refImages[i]; + if (image is not IImage refImage) + refImage = image.ConvertTo(); - result = Native.img2img(_ctx, - image, - mask, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Strength, - parameter.Seed, - 1, - &nativeControlNetImage, - parameter.ControlNet.Strength, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); + nativeRefImages[i] = refImage.ToSdImage(out nint dataPtr); + ptrsToFree.Add(dataPtr); + } - Marshal.FreeHGlobal((nint)nativeControlNetImage.data); - } - else - { - Native.sd_image_t nativeControlNetImage = new() - { - width = (uint)parameter.ControlNet.Image.Width, - height = (uint)parameter.ControlNet.Image.Height, - channel = (uint)parameter.ControlNet.Image.ColorFormat.BytesPerPixel, - data = imagePtr - }; + fixed (Native.sd_image_t* nativeRefImagesPtr = nativeRefImages) + { + nativeParameters.ref_images = nativeRefImagesPtr; + nativeParameters.ref_images_count = nativeRefImages.Length; - result = Native.img2img(_ctx, - image, - mask, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Strength, - parameter.Seed, - 1, - &nativeControlNetImage, - parameter.ControlNet.Strength, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); - } + Native.sd_image_t* result = Edit(nativeParameters); + + return ImageHelper.ToImage(result); } } - else + finally { - result = Native.img2img(_ctx, - image, - mask, - prompt, - parameter.NegativePrompt, - parameter.ClipSkip, - parameter.CfgScale, - parameter.Guidance, - parameter.Width, - parameter.Height, - parameter.SampleMethod, - parameter.SampleSteps, - parameter.Strength, - parameter.Seed, - 1, - null, - 0, - parameter.PhotoMaker.StyleRatio, - parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory, - parameter.SkipLayers, - parameter.SkipLayers.Length, - parameter.SlgScale, - parameter.SkipLayerStart, - parameter.SkipLayerEnd); + foreach (nint ptr in ptrsToFree) + Marshal.FreeHGlobal(ptr); } - - return ImageHelper.ToImage(result); } + private Image InternalImageToImage(string prompt, IImage image, Span mask, DiffusionParameter parameter) + { + List ptrsToFree = []; + + try + { + NativeParameters nativeParameters = PrefillParameters(prompt, parameter); + SetControlNetParameters(ref nativeParameters, parameter, ptrsToFree); + + if (image is not IImage refImage) + refImage = image.ConvertTo(); + + nativeParameters.init_image = refImage.ToSdImage(out nint imagePtr); + ptrsToFree.Add(imagePtr); + + fixed (byte* maskPtr = mask) + { + Native.sd_image_t maskImage = new() + { + width = (uint)refImage.Width, + height = (uint)refImage.Height, + channel = 1, + data = maskPtr + }; + nativeParameters.mask_image = maskImage; + + Native.sd_image_t* result = Img2Img(nativeParameters); + + return ImageHelper.ToImage(result); + } + } + finally + { + foreach (nint ptr in ptrsToFree) + Marshal.FreeHGlobal(ptr); + } + } + + private static NativeParameters PrefillParameters(string prompt, DiffusionParameter parameter) + => new() + { + prompt = prompt, + negative_prompt = parameter.NegativePrompt, + clip_skip = parameter.ClipSkip, + cfg_scale = parameter.CfgScale, + guidance = parameter.Guidance, + eta = parameter.Eta, + width = parameter.Width, + height = parameter.Height, + sample_method = parameter.SampleMethod, + sample_steps = parameter.SampleSteps, + seed = parameter.Seed, + batch_count = 1, + control_cond = null, + control_strength = 0, + style_strength = parameter.PhotoMaker.StyleRatio, + normalize_input = parameter.PhotoMaker.NormalizeInput, + input_id_images_path = parameter.PhotoMaker.InputIdImageDirectory, + skip_layers = parameter.SkipLayers, + skip_layers_count = parameter.SkipLayers.Length, + slg_scale = parameter.SlgScale, + skip_layer_start = parameter.SkipLayerStart, + skip_layer_end = parameter.SkipLayerEnd, + strength = parameter.Strength, + }; + + private static void SetControlNetParameters(ref NativeParameters nativeParameters, DiffusionParameter parameter, List ptrsToFree) + { + if (!parameter.ControlNet.IsEnabled) return; + if (parameter.ControlNet.Image == null) return; + + if (parameter.ControlNet.Image is not IImage controlNetImage) + controlNetImage = parameter.ControlNet.Image!.ConvertTo(); + + Native.sd_image_t* nativeControlNetImage = controlNetImage.ToSdImagePtr(out nint controlNetImagePtr); + ptrsToFree.Add(controlNetImagePtr); + ptrsToFree.Add((nint)nativeControlNetImage); + + nativeParameters.control_cond = nativeControlNetImage; + nativeParameters.control_strength = parameter.ControlNet.Strength; + + if (parameter.ControlNet.CannyPreprocess) + { + nativeParameters.control_cond->data = Native.preprocess_canny(nativeParameters.control_cond->data, + parameter.Width, + parameter.Height, + parameter.ControlNet.CannyHighThreshold, + parameter.ControlNet.CannyLowThreshold, + parameter.ControlNet.CannyWeak, + parameter.ControlNet.CannyStrong, + parameter.ControlNet.CannyInverse); + ptrsToFree.Add((nint)nativeParameters.control_cond->data); + } + } + + private Native.sd_image_t* Txt2Img(NativeParameters parameter) + => Native.txt2img(_ctx, + parameter.prompt, + parameter.negative_prompt, + parameter.clip_skip, + parameter.cfg_scale, + parameter.guidance, + parameter.eta, + parameter.width, + parameter.height, + parameter.sample_method, + parameter.sample_steps, + parameter.seed, + parameter.batch_count, + parameter.control_cond, + parameter.control_strength, + parameter.style_strength, + parameter.normalize_input, + parameter.input_id_images_path, + parameter.skip_layers, + parameter.skip_layers_count, + parameter.slg_scale, + parameter.skip_layer_start, + parameter.skip_layer_end + ); + + private Native.sd_image_t* Img2Img(NativeParameters parameter) + => Native.img2img(_ctx, + parameter.init_image, + parameter.mask_image, + parameter.prompt, + parameter.negative_prompt, + parameter.clip_skip, + parameter.cfg_scale, + parameter.guidance, + parameter.width, + parameter.height, + parameter.sample_method, + parameter.sample_steps, + parameter.strength, + parameter.seed, + parameter.batch_count, + parameter.control_cond, + parameter.control_strength, + parameter.style_strength, + parameter.normalize_input, + parameter.input_id_images_path, + parameter.skip_layers, + parameter.skip_layers_count, + parameter.slg_scale, + parameter.skip_layer_start, + parameter.skip_layer_end + ); + + private Native.sd_image_t* Edit(NativeParameters parameter) + => Native.edit(_ctx, + parameter.ref_images, + parameter.ref_images_count, + parameter.prompt, + parameter.negative_prompt, + parameter.clip_skip, + parameter.cfg_scale, + parameter.guidance, + parameter.eta, + parameter.width, + parameter.height, + parameter.sample_method, + parameter.sample_steps, + parameter.strength, + parameter.seed, + parameter.batch_count, + parameter.control_cond, + parameter.control_strength, + parameter.style_strength, + parameter.normalize_input, + parameter.skip_layers, + parameter.skip_layers_count, + parameter.slg_scale, + parameter.skip_layer_start, + parameter.skip_layer_end + ); + public void Dispose() { if (_disposed) return; @@ -415,4 +379,37 @@ public sealed unsafe class DiffusionModel : IDisposable } #endregion + + private ref struct NativeParameters + { + internal string prompt; + internal string negative_prompt; + internal int clip_skip; + internal float cfg_scale; + internal float guidance; + internal float eta; + internal int width; + internal int height; + internal Sampler sample_method; + internal int sample_steps; + internal long seed; + internal int batch_count; + internal Native.sd_image_t* control_cond; + internal float control_strength; + internal float style_strength; + internal bool normalize_input; + internal string input_id_images_path; + internal int[] skip_layers; + internal int skip_layers_count; + internal float slg_scale; + internal float skip_layer_start; + internal float skip_layer_end; + + internal Native.sd_image_t init_image; + internal Native.sd_image_t mask_image; + + internal Native.sd_image_t* ref_images; + internal int ref_images_count; + internal float strength; + } } \ No newline at end of file diff --git a/StableDiffusion.NET/Native/Native.cs b/StableDiffusion.NET/Native/Native.cs index 0da04d2..8466440 100644 --- a/StableDiffusion.NET/Native/Native.cs +++ b/StableDiffusion.NET/Native/Native.cs @@ -94,7 +94,7 @@ internal unsafe partial class Native float style_strength, [MarshalAs(UnmanagedType.I1)] bool normalize_input, [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path, - int[] skip_layers, + in int[] skip_layers, int skip_layers_count, float slg_scale, float skip_layer_start, @@ -121,7 +121,7 @@ internal unsafe partial class Native float style_strength, [MarshalAs(UnmanagedType.I1)] bool normalize_input, [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path, - int[] skip_layers, + in int[] skip_layers, int skip_layers_count, float slg_scale, float skip_layer_start, @@ -143,6 +143,33 @@ internal unsafe partial class Native float strength, long seed); + [LibraryImport(LIB_NAME, EntryPoint = "edit")] + internal static partial sd_image_t* edit(sd_ctx_t* sd_ctx, + sd_image_t* ref_images, + int ref_images_count, + [MarshalAs(UnmanagedType.LPStr)] string prompt, + [MarshalAs(UnmanagedType.LPStr)] string negative_prompt, + int clip_skip, + float cfg_scale, + float guidance, + float eta, + int width, + int height, + sample_method_t sample_method, + int sample_steps, + float strength, + long seed, + int batch_count, + sd_image_t* control_cond, + float control_strength, + float style_strength, + [MarshalAs(UnmanagedType.I1)] bool normalize_input, + in int[] skip_layers, + int skip_layers_count, + float slg_scale, + float skip_layer_start, + float skip_layer_end); + [LibraryImport(LIB_NAME, EntryPoint = "new_upscaler_ctx")] internal static partial upscaler_ctx_t* new_upscaler_ctx([MarshalAs(UnmanagedType.LPStr)] string esrgan_path, int n_threads,