From f79b74efccf2c1b7f671177760313a7108f0cb2a Mon Sep 17 00:00:00 2001 From: DarthAffe Date: Fri, 3 Jan 2025 14:39:50 +0100 Subject: [PATCH 1/3] Added flash attention and slg layer skip parameters; added comments for parameters --- StableDiffusion.NET/Enums/Quantization.cs | 2 + .../DiffusionModelBuilderExtension.cs | 8 ++ StableDiffusion.NET/Models/DiffusionModel.cs | 45 +++++++++-- .../Models/Parameter/ControlNetParameter.cs | 32 ++++++++ .../Parameter/DiffusionModelParameter.cs | 80 ++++++++++++++++++- .../Models/Parameter/DiffusionParameter.cs | 60 ++++++++++++++ .../Extensions/DiffusionParameterExtension.cs | 28 +++++++ .../Interfaces/IDiffusionModelParameter.cs | 1 + .../Models/Parameter/PhotoMakerParameter.cs | 11 +++ .../Models/Parameter/UpscaleModelParameter.cs | 11 +++ StableDiffusion.NET/Native/Native.cs | 17 +++- 11 files changed, 284 insertions(+), 11 deletions(-) diff --git a/StableDiffusion.NET/Enums/Quantization.cs b/StableDiffusion.NET/Enums/Quantization.cs index 39e886a..2658c63 100644 --- a/StableDiffusion.NET/Enums/Quantization.cs +++ b/StableDiffusion.NET/Enums/Quantization.cs @@ -37,6 +37,8 @@ public enum Quantization Q4_0_4_4 = 31, Q4_0_4_8 = 32, Q4_0_8_8 = 33, + TQ1_0 = 34, + TQ2_0 = 35, Unspecified } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/Builder/Extensions/DiffusionModelBuilderExtension.cs b/StableDiffusion.NET/Models/Builder/Extensions/DiffusionModelBuilderExtension.cs index e156e30..53ae229 100644 --- a/StableDiffusion.NET/Models/Builder/Extensions/DiffusionModelBuilderExtension.cs +++ b/StableDiffusion.NET/Models/Builder/Extensions/DiffusionModelBuilderExtension.cs @@ -119,4 +119,12 @@ public static class DiffusionModelBuilderExtension return builder; } + + public static T WithFlashAttention(this T builder, bool flashAttention = true) + where T : IDiffusionModelBuilder + { + builder.Parameter.FlashAttention = flashAttention; + + return builder; + } } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/DiffusionModel.cs b/StableDiffusion.NET/Models/DiffusionModel.cs index cbb2e7d..e53b698 100644 --- a/StableDiffusion.NET/Models/DiffusionModel.cs +++ b/StableDiffusion.NET/Models/DiffusionModel.cs @@ -59,7 +59,8 @@ public sealed unsafe class DiffusionModel : IDisposable ModelParameter.Schedule, ModelParameter.KeepClipOnCPU, ModelParameter.KeepControlNetOnCPU, - ModelParameter.KeepVaeOnCPU); + ModelParameter.KeepVaeOnCPU, + ModelParameter.FlashAttention); if (_ctx == null) throw new NullReferenceException("Failed to initialize diffusion-model."); } @@ -122,7 +123,12 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.ControlNet.Strength, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); Marshal.FreeHGlobal((nint)nativeControlNetImage.data); } @@ -152,7 +158,12 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.ControlNet.Strength, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); } } } @@ -174,7 +185,12 @@ public sealed unsafe class DiffusionModel : IDisposable 0, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); } return ImageHelper.ToImage(result); @@ -246,7 +262,12 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.ControlNet.Strength, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); Marshal.FreeHGlobal((nint)nativeControlNetImage.data); } @@ -278,7 +299,12 @@ public sealed unsafe class DiffusionModel : IDisposable parameter.ControlNet.Strength, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); } } } @@ -302,7 +328,12 @@ public sealed unsafe class DiffusionModel : IDisposable 0, parameter.PhotoMaker.StyleRatio, parameter.PhotoMaker.NormalizeInput, - parameter.PhotoMaker.InputIdImageDirectory); + parameter.PhotoMaker.InputIdImageDirectory, + parameter.SkipLayers, + parameter.SkipLayers.Length, + parameter.SlgScale, + parameter.SkipLayerStart, + parameter.SkipLayerEnd); } return ImageHelper.ToImage(result); diff --git a/StableDiffusion.NET/Models/Parameter/ControlNetParameter.cs b/StableDiffusion.NET/Models/Parameter/ControlNetParameter.cs index 9c89dd2..406435c 100644 --- a/StableDiffusion.NET/Models/Parameter/ControlNetParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/ControlNetParameter.cs @@ -8,12 +8,44 @@ public sealed class ControlNetParameter { public bool IsEnabled => Image != null; + /// + /// image condition, control net + /// public IImage? Image { get; set; } = null; + + /// + /// strength to apply Control Net (default: 0.9) + /// 1.0 corresponds to full destruction of information in init image + /// public float Strength { get; set; } = 0.9f; + + /// + /// apply canny preprocessor (edge detection) + /// public bool CannyPreprocess { get; set; } = false; + + /// + /// + /// public float CannyHighThreshold { get; set; } = 0.08f; + + /// + /// + /// public float CannyLowThreshold { get; set; } = 0.08f; + + /// + /// + /// public float CannyWeak { get; set; } = 0.8f; + + /// + /// + /// public float CannyStrong { get; set; } = 1.0f; + + /// + /// + /// public bool CannyInverse { get; set; } = false; } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/Parameter/DiffusionModelParameter.cs b/StableDiffusion.NET/Models/Parameter/DiffusionModelParameter.cs index 8a54a79..da59ebc 100644 --- a/StableDiffusion.NET/Models/Parameter/DiffusionModelParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/DiffusionModelParameter.cs @@ -4,36 +4,114 @@ public sealed class DiffusionModelParameter : IDiffusionModelParameter, IQuantiz { public DiffusionModelType DiffusionModelType { get; set; } = DiffusionModelType.None; + /// + /// path to vae + /// public string VaePath { get; set; } = string.Empty; + + /// + /// path to taesd. Using Tiny AutoEncoder for fast decoding (low quality) + /// public string TaesdPath { get; set; } = string.Empty; + /// + /// lora model directory + /// public string LoraModelDirectory { get; set; } = string.Empty; + + /// + /// path to embeddings + /// public string EmbeddingsDirectory { get; set; } = string.Empty; + + /// + /// path to control net model + /// public string ControlNetPath { get; set; } = string.Empty; + /// + /// number of threads to use during computation (default: -1) + /// If threads = -1, then threads will be set to the number of CPU physical cores + /// public int ThreadCount { get; set; } = 1; + /// + /// + /// public bool VaeDecodeOnly { get; set; } = false; + + /// + /// process vae in tiles to reduce memory usage + /// public bool VaeTiling { get; set; } = false; + + /// + /// keep controlnet in cpu + /// public bool KeepControlNetOnCPU { get; set; } = false; + + /// + /// keep clip in cpu (for low vram) + /// public bool KeepClipOnCPU { get; set; } = false; + + /// + /// keep vae in cpu (for low vram) + /// public bool KeepVaeOnCPU { get; set; } = false; + /// + /// use flash attention in the diffusion model (for low vram) + /// Might lower quality, since it implies converting k and v to f16. + /// This might crash if it is not supported by the backend. + /// + public bool FlashAttention { get; set; } = false; + + /// + /// RNG (default: Standard) + /// public RngType RngType { get; set; } = RngType.Standard; + + /// + /// Denoiser sigma schedule (default: Default) + /// public Schedule Schedule { get; set; } = Schedule.Default; + /// + /// + /// public Quantization Quantization { get; set; } = Quantization.Unspecified; // SD <= 3 only + /// + /// path to full model + /// public string ModelPath { get; set; } = string.Empty; + + /// + /// path to PHOTOMAKER stacked id embeddings + /// public string StackedIdEmbeddingsDirectory { get; set; } = string.Empty; // Flux & SD3.5 only + /// + /// path to the standalone diffusion model + /// public string DiffusionModelPath { get; set; } = string.Empty; + + /// + /// path to the clip-l text encoder + /// public string ClipLPath { get; set; } = string.Empty; + + /// + /// path to the the t5xxl text encoder + /// public string T5xxlPath { get; set; } = string.Empty; - // SD3.5 only + /// + /// path to the clip-g text encoder + /// public string ClipGPath { get; set; } = string.Empty; } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/Parameter/DiffusionParameter.cs b/StableDiffusion.NET/Models/Parameter/DiffusionParameter.cs index c6809e5..03d819b 100644 --- a/StableDiffusion.NET/Models/Parameter/DiffusionParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/DiffusionParameter.cs @@ -12,22 +12,82 @@ public sealed class DiffusionParameter public static DiffusionParameter SD3_5Default => new() { Width = 1024, Height = 1024, CfgScale = 4.5f, Guidance = 1f, SampleSteps = 20, SampleMethod = Sampler.Euler }; public static DiffusionParameter FluxDefault => new() { Width = 1024, Height = 1024, CfgScale = 1, Guidance = 3.5f, SampleSteps = 20, SampleMethod = Sampler.Euler }; + /// + /// the negative prompt (default: ""); + /// public string NegativePrompt { get; set; } = string.Empty; + + /// + /// image width, in pixel space (default: 512) + /// public int Width { get; set; } = 512; + + /// + /// image height, in pixel space (default: 512) + /// public int Height { get; set; } = 512; + + /// + /// sampling method (default: Euler_A) + /// public Sampler SampleMethod { get; set; } = Sampler.Euler_A; + + /// + /// number of sample steps (default: 25) + /// public int SampleSteps { get; set; } = 25; + + /// + /// RNG seed. use -1 for a random seed (default: -1) + /// public long Seed { get; set; } = -1; + + /// + /// strength for noising/unnoising (default: 0.7) + /// public float Strength { get; set; } = 0.7f; + + /// + /// ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1) + /// -1 represents unspecified, will be 1 for SD1.x, 2 for SD2.x + /// public int ClipSkip { get; set; } = -1; + /// + /// skip layer guidance (SLG) scale, only for DiT models: (default: 0) + /// 0 means disabled, a value of 2.5 is nice for sd3.5 medium + /// + public float SlgScale { get; set; } = 0f; + + /// + /// Layers to skip for SLG steps: (default: [7,8,9]) + /// + public int[] SkipLayers { get; set; } = [7, 8, 9]; + + /// + /// SLG enabling point: (default: 0.01) + /// + public float SkipLayerStart { get; set; } = 0.01f; + + /// + /// SLG disabling point: (default: 0.2) + /// + public float SkipLayerEnd { get; set; } = 0.2f; + public ControlNetParameter ControlNet { get; } = new(); // Stable Diffusion only + /// + /// unconditional guidance scale: (default: 7.5) + /// public float CfgScale { get; set; } = 7.5f; + public PhotoMakerParameter PhotoMaker { get; } = new(); // Flux only + /// + /// + /// public float Guidance { get; set; } = 3.5f; #endregion diff --git a/StableDiffusion.NET/Models/Parameter/Extensions/DiffusionParameterExtension.cs b/StableDiffusion.NET/Models/Parameter/Extensions/DiffusionParameterExtension.cs index 52d4bef..53f4b3e 100644 --- a/StableDiffusion.NET/Models/Parameter/Extensions/DiffusionParameterExtension.cs +++ b/StableDiffusion.NET/Models/Parameter/Extensions/DiffusionParameterExtension.cs @@ -64,6 +64,34 @@ public static class DiffusionParameterExtension return parameter; } + public static DiffusionParameter WithSlgScale(this DiffusionParameter parameter, float slgScale) + { + parameter.SlgScale = slgScale; + + return parameter; + } + + public static DiffusionParameter WithSkipLayers(this DiffusionParameter parameter, int[] layers) + { + parameter.SkipLayers = layers; + + return parameter; + } + + public static DiffusionParameter WithSkipLayerStart(this DiffusionParameter parameter, float skipLayerStart) + { + parameter.SkipLayerStart = skipLayerStart; + + return parameter; + } + + public static DiffusionParameter WithSkipLayerEnd(this DiffusionParameter parameter, float skipLayerEnd) + { + parameter.SkipLayerEnd = skipLayerEnd; + + return parameter; + } + public static DiffusionParameter WithControlNet(this DiffusionParameter parameter, IImage image, float? strength = null) { parameter.ControlNet.Image = image; diff --git a/StableDiffusion.NET/Models/Parameter/Interfaces/IDiffusionModelParameter.cs b/StableDiffusion.NET/Models/Parameter/Interfaces/IDiffusionModelParameter.cs index e18ed24..12569ed 100644 --- a/StableDiffusion.NET/Models/Parameter/Interfaces/IDiffusionModelParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/Interfaces/IDiffusionModelParameter.cs @@ -16,6 +16,7 @@ public interface IDiffusionModelParameter bool KeepControlNetOnCPU { get; set; } bool KeepClipOnCPU { get; set; } bool KeepVaeOnCPU { get; set; } + bool FlashAttention { get; set; } RngType RngType { get; set; } Schedule Schedule { get; set; } diff --git a/StableDiffusion.NET/Models/Parameter/PhotoMakerParameter.cs b/StableDiffusion.NET/Models/Parameter/PhotoMakerParameter.cs index cab62fd..57466e1 100644 --- a/StableDiffusion.NET/Models/Parameter/PhotoMakerParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/PhotoMakerParameter.cs @@ -5,7 +5,18 @@ namespace StableDiffusion.NET; [PublicAPI] public sealed class PhotoMakerParameter { + /// + /// path to PHOTOMAKER input id images dir + /// public string InputIdImageDirectory { get; set; } = string.Empty; + + /// + /// strength for keeping input identity (default: 20) + /// public float StyleRatio { get; set; } = 20f; + + /// + /// normalize PHOTOMAKER input id images + /// public bool NormalizeInput { get; set; } = false; } \ No newline at end of file diff --git a/StableDiffusion.NET/Models/Parameter/UpscaleModelParameter.cs b/StableDiffusion.NET/Models/Parameter/UpscaleModelParameter.cs index 023a817..17844de 100644 --- a/StableDiffusion.NET/Models/Parameter/UpscaleModelParameter.cs +++ b/StableDiffusion.NET/Models/Parameter/UpscaleModelParameter.cs @@ -5,8 +5,19 @@ namespace StableDiffusion.NET; [PublicAPI] public sealed class UpscaleModelParameter : IQuantizedModelParameter { + /// + /// path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now + /// public string ModelPath { get; set; } = string.Empty; + + /// + /// number of threads to use during computation (default: -1) + /// If threads = -1, then threads will be set to the number of CPU physical cores + /// public int ThreadCount { get; set; } = 1; + /// + /// + /// public Quantization Quantization { get; set; } = Quantization.F16; } \ No newline at end of file diff --git a/StableDiffusion.NET/Native/Native.cs b/StableDiffusion.NET/Native/Native.cs index 66f55ab..a532b48 100644 --- a/StableDiffusion.NET/Native/Native.cs +++ b/StableDiffusion.NET/Native/Native.cs @@ -69,7 +69,8 @@ internal unsafe partial class Native schedule_t s, [MarshalAs(UnmanagedType.I1)] bool keep_clip_on_cpu, [MarshalAs(UnmanagedType.I1)] bool keep_control_net_cpu, - [MarshalAs(UnmanagedType.I1)] bool keep_vae_on_cpu); + [MarshalAs(UnmanagedType.I1)] bool keep_vae_on_cpu, + [MarshalAs(UnmanagedType.I1)] bool diffusion_flash_attn); [LibraryImport(LIB_NAME, EntryPoint = "free_sd_ctx")] internal static partial void free_sd_ctx(sd_ctx_t* sd_ctx); @@ -91,7 +92,12 @@ internal unsafe partial class Native float control_strength, float style_strength, [MarshalAs(UnmanagedType.I1)] bool normalize_input, - [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path); + [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path, + int[] skip_layers, + int skip_layers_count, + float slg_scale, + float skip_layer_start, + float skip_layer_end); [LibraryImport(LIB_NAME, EntryPoint = "img2img")] internal static partial sd_image_t* img2img(sd_ctx_t* sd_ctx, @@ -112,7 +118,12 @@ internal unsafe partial class Native float control_strength, float style_strength, [MarshalAs(UnmanagedType.I1)] bool normalize_input, - [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path); + [MarshalAs(UnmanagedType.LPStr)] string input_id_images_path, + int[] skip_layers, + int skip_layers_count, + float slg_scale, + float skip_layer_start, + float skip_layer_end); [LibraryImport(LIB_NAME, EntryPoint = "img2vid")] internal static partial sd_image_t* img2vid(sd_ctx_t* sd_ctx, From b174c2aeb60b7f2f3543d572a5eb09448cde2830 Mon Sep 17 00:00:00 2001 From: DarthAffe Date: Fri, 3 Jan 2025 14:56:19 +0100 Subject: [PATCH 2/3] Updated backend in example --- Examples/ImageCreationUI/ImageCreationUI.csproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Examples/ImageCreationUI/ImageCreationUI.csproj b/Examples/ImageCreationUI/ImageCreationUI.csproj index 1a68fce..1f97149 100644 --- a/Examples/ImageCreationUI/ImageCreationUI.csproj +++ b/Examples/ImageCreationUI/ImageCreationUI.csproj @@ -11,9 +11,9 @@ - - - + + + From b7e5ee7232ea5e3277e939becbc626f426f138fe Mon Sep 17 00:00:00 2001 From: DarthAffe Date: Fri, 3 Jan 2025 14:56:27 +0100 Subject: [PATCH 3/3] Added flash attention parameter to example --- Examples/ImageCreationUI/MainWindow.xaml | 5 +++++ Examples/ImageCreationUI/MainWindowViewModel.cs | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Examples/ImageCreationUI/MainWindow.xaml b/Examples/ImageCreationUI/MainWindow.xaml index 13aa12f..a549ca1 100644 --- a/Examples/ImageCreationUI/MainWindow.xaml +++ b/Examples/ImageCreationUI/MainWindow.xaml @@ -91,6 +91,11 @@