Merge pull request #44 from DarthAffe/SD3_5

Sd3 5
This commit is contained in:
DarthAffe 2025-01-03 14:57:32 +01:00 committed by GitHub
commit 527149f3e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 301 additions and 16 deletions

View File

@ -11,9 +11,9 @@
<ItemGroup>
<PackageReference Include="HPPH.System.Drawing" Version="1.0.0" />
<PackageReference Include="StableDiffusion.NET.Backend.Cpu" Version="3.2.0" />
<PackageReference Include="StableDiffusion.NET.Backend.Cuda" Version="3.2.0" />
<PackageReference Include="StableDiffusion.NET.Backend.Rocm" Version="3.2.0" />
<PackageReference Include="StableDiffusion.NET.Backend.Cpu" Version="3.3.1" />
<PackageReference Include="StableDiffusion.NET.Backend.Cuda" Version="3.3.1" />
<PackageReference Include="StableDiffusion.NET.Backend.Rocm" Version="3.3.1" />
</ItemGroup>
<ItemGroup>

View File

@ -91,6 +91,11 @@
<Label Content="Schedule" />
<ComboBox ItemsSource="{Binding Source={StaticResource ScheduleDataSource}}" SelectedItem="{Binding Schedule}" />
<StackPanel Orientation="Horizontal" Margin="0,4,0,0">
<Label Content="Flash Attention" />
<CheckBox VerticalAlignment="Center" IsChecked="{Binding FlashAttention}" />
</StackPanel>
<Button Margin="0,8" Content="Load Model" Command="{Binding LoadModelCommand}" IsEnabled="{Binding IsReady}" />
<Separator />

View File

@ -71,6 +71,13 @@ public class MainWindowViewModel : INotifyPropertyChanged
set => SetProperty(ref _schedule, value);
}
private bool _flashAttention = true;
public bool FlashAttention
{
get => _flashAttention;
set => SetProperty(ref _flashAttention, value);
}
private string _prompt = string.Empty;
public string Prompt
{
@ -242,14 +249,14 @@ public class MainWindowViewModel : INotifyPropertyChanged
restoreDefaultParameters = _model?.ModelParameter.DiffusionModelType != DiffusionModelType.StableDiffusion;
LogLine($"Loading stable diffusion-model '{ModelPath}'");
_model = await Task.Run(() => ModelBuilder.StableDiffusion(ModelPath).WithMultithreading().WithVae(VaePath).WithSchedule(Schedule).Build());
_model = await Task.Run(() => ModelBuilder.StableDiffusion(ModelPath).WithMultithreading().WithVae(VaePath).WithSchedule(Schedule).WithFlashAttention(FlashAttention).Build());
}
else if (IsFluxSelected)
{
restoreDefaultParameters = _model?.ModelParameter.DiffusionModelType != DiffusionModelType.Flux;
LogLine($"Loading flux-model '{DiffusionModelPath}'");
_model = await Task.Run(() => ModelBuilder.Flux(DiffusionModelPath, ClipLPath, T5xxlPath, VaePath).WithMultithreading().WithSchedule(Schedule).Build());
_model = await Task.Run(() => ModelBuilder.Flux(DiffusionModelPath, ClipLPath, T5xxlPath, VaePath).WithMultithreading().WithSchedule(Schedule).WithFlashAttention(FlashAttention).Build());
}
else
{

View File

@ -37,6 +37,8 @@ public enum Quantization
Q4_0_4_4 = 31,
Q4_0_4_8 = 32,
Q4_0_8_8 = 33,
TQ1_0 = 34,
TQ2_0 = 35,
Unspecified
}

View File

@ -119,4 +119,12 @@ public static class DiffusionModelBuilderExtension
return builder;
}
public static T WithFlashAttention<T>(this T builder, bool flashAttention = true)
where T : IDiffusionModelBuilder
{
builder.Parameter.FlashAttention = flashAttention;
return builder;
}
}

View File

@ -59,7 +59,8 @@ public sealed unsafe class DiffusionModel : IDisposable
ModelParameter.Schedule,
ModelParameter.KeepClipOnCPU,
ModelParameter.KeepControlNetOnCPU,
ModelParameter.KeepVaeOnCPU);
ModelParameter.KeepVaeOnCPU,
ModelParameter.FlashAttention);
if (_ctx == null) throw new NullReferenceException("Failed to initialize diffusion-model.");
}
@ -122,7 +123,12 @@ public sealed unsafe class DiffusionModel : IDisposable
parameter.ControlNet.Strength,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
Marshal.FreeHGlobal((nint)nativeControlNetImage.data);
}
@ -152,7 +158,12 @@ public sealed unsafe class DiffusionModel : IDisposable
parameter.ControlNet.Strength,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
}
}
}
@ -174,7 +185,12 @@ public sealed unsafe class DiffusionModel : IDisposable
0,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
}
return ImageHelper.ToImage(result);
@ -246,7 +262,12 @@ public sealed unsafe class DiffusionModel : IDisposable
parameter.ControlNet.Strength,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
Marshal.FreeHGlobal((nint)nativeControlNetImage.data);
}
@ -278,7 +299,12 @@ public sealed unsafe class DiffusionModel : IDisposable
parameter.ControlNet.Strength,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
}
}
}
@ -302,7 +328,12 @@ public sealed unsafe class DiffusionModel : IDisposable
0,
parameter.PhotoMaker.StyleRatio,
parameter.PhotoMaker.NormalizeInput,
parameter.PhotoMaker.InputIdImageDirectory);
parameter.PhotoMaker.InputIdImageDirectory,
parameter.SkipLayers,
parameter.SkipLayers.Length,
parameter.SlgScale,
parameter.SkipLayerStart,
parameter.SkipLayerEnd);
}
return ImageHelper.ToImage(result);

View File

@ -8,12 +8,44 @@ public sealed class ControlNetParameter
{
public bool IsEnabled => Image != null;
/// <summary>
/// image condition, control net
/// </summary>
public IImage? Image { get; set; } = null;
/// <summary>
/// strength to apply Control Net (default: 0.9)
/// 1.0 corresponds to full destruction of information in init image
/// </summary>
public float Strength { get; set; } = 0.9f;
/// <summary>
/// apply canny preprocessor (edge detection)
/// </summary>
public bool CannyPreprocess { get; set; } = false;
/// <summary>
///
/// </summary>
public float CannyHighThreshold { get; set; } = 0.08f;
/// <summary>
///
/// </summary>
public float CannyLowThreshold { get; set; } = 0.08f;
/// <summary>
///
/// </summary>
public float CannyWeak { get; set; } = 0.8f;
/// <summary>
///
/// </summary>
public float CannyStrong { get; set; } = 1.0f;
/// <summary>
///
/// </summary>
public bool CannyInverse { get; set; } = false;
}

View File

@ -4,36 +4,114 @@ public sealed class DiffusionModelParameter : IDiffusionModelParameter, IQuantiz
{
public DiffusionModelType DiffusionModelType { get; set; } = DiffusionModelType.None;
/// <summary>
/// path to vae
/// </summary>
public string VaePath { get; set; } = string.Empty;
/// <summary>
/// path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
/// </summary>
public string TaesdPath { get; set; } = string.Empty;
/// <summary>
/// lora model directory
/// </summary>
public string LoraModelDirectory { get; set; } = string.Empty;
/// <summary>
/// path to embeddings
/// </summary>
public string EmbeddingsDirectory { get; set; } = string.Empty;
/// <summary>
/// path to control net model
/// </summary>
public string ControlNetPath { get; set; } = string.Empty;
/// <summary>
/// number of threads to use during computation (default: -1)
/// If threads = -1, then threads will be set to the number of CPU physical cores
/// </summary>
public int ThreadCount { get; set; } = 1;
/// <summary>
///
/// </summary>
public bool VaeDecodeOnly { get; set; } = false;
/// <summary>
/// process vae in tiles to reduce memory usage
/// </summary>
public bool VaeTiling { get; set; } = false;
/// <summary>
/// keep controlnet in cpu
/// </summary>
public bool KeepControlNetOnCPU { get; set; } = false;
/// <summary>
/// keep clip in cpu (for low vram)
/// </summary>
public bool KeepClipOnCPU { get; set; } = false;
/// <summary>
/// keep vae in cpu (for low vram)
/// </summary>
public bool KeepVaeOnCPU { get; set; } = false;
/// <summary>
/// use flash attention in the diffusion model (for low vram)
/// Might lower quality, since it implies converting k and v to f16.
/// This might crash if it is not supported by the backend.
/// </summary>
public bool FlashAttention { get; set; } = false;
/// <summary>
/// RNG (default: Standard)
/// </summary>
public RngType RngType { get; set; } = RngType.Standard;
/// <summary>
/// Denoiser sigma schedule (default: Default)
/// </summary>
public Schedule Schedule { get; set; } = Schedule.Default;
/// <summary>
///
/// </summary>
public Quantization Quantization { get; set; } = Quantization.Unspecified;
// SD <= 3 only
/// <summary>
/// path to full model
/// </summary>
public string ModelPath { get; set; } = string.Empty;
/// <summary>
/// path to PHOTOMAKER stacked id embeddings
/// </summary>
public string StackedIdEmbeddingsDirectory { get; set; } = string.Empty;
// Flux & SD3.5 only
/// <summary>
/// path to the standalone diffusion model
/// </summary>
public string DiffusionModelPath { get; set; } = string.Empty;
/// <summary>
/// path to the clip-l text encoder
/// </summary>
public string ClipLPath { get; set; } = string.Empty;
/// <summary>
/// path to the the t5xxl text encoder
/// </summary>
public string T5xxlPath { get; set; } = string.Empty;
// SD3.5 only
/// <summary>
/// path to the clip-g text encoder
/// </summary>
public string ClipGPath { get; set; } = string.Empty;
}

View File

@ -12,22 +12,82 @@ public sealed class DiffusionParameter
public static DiffusionParameter SD3_5Default => new() { Width = 1024, Height = 1024, CfgScale = 4.5f, Guidance = 1f, SampleSteps = 20, SampleMethod = Sampler.Euler };
public static DiffusionParameter FluxDefault => new() { Width = 1024, Height = 1024, CfgScale = 1, Guidance = 3.5f, SampleSteps = 20, SampleMethod = Sampler.Euler };
/// <summary>
/// the negative prompt (default: "");
/// </summary>
public string NegativePrompt { get; set; } = string.Empty;
/// <summary>
/// image width, in pixel space (default: 512)
/// </summary>
public int Width { get; set; } = 512;
/// <summary>
/// image height, in pixel space (default: 512)
/// </summary>
public int Height { get; set; } = 512;
/// <summary>
/// sampling method (default: Euler_A)
/// </summary>
public Sampler SampleMethod { get; set; } = Sampler.Euler_A;
/// <summary>
/// number of sample steps (default: 25)
/// </summary>
public int SampleSteps { get; set; } = 25;
/// <summary>
/// RNG seed. use -1 for a random seed (default: -1)
/// </summary>
public long Seed { get; set; } = -1;
/// <summary>
/// strength for noising/unnoising (default: 0.7)
/// </summary>
public float Strength { get; set; } = 0.7f;
/// <summary>
/// ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
/// -1 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
/// </summary>
public int ClipSkip { get; set; } = -1;
/// <summary>
/// skip layer guidance (SLG) scale, only for DiT models: (default: 0)
/// 0 means disabled, a value of 2.5 is nice for sd3.5 medium
/// </summary>
public float SlgScale { get; set; } = 0f;
/// <summary>
/// Layers to skip for SLG steps: (default: [7,8,9])
/// </summary>
public int[] SkipLayers { get; set; } = [7, 8, 9];
/// <summary>
/// SLG enabling point: (default: 0.01)
/// </summary>
public float SkipLayerStart { get; set; } = 0.01f;
/// <summary>
/// SLG disabling point: (default: 0.2)
/// </summary>
public float SkipLayerEnd { get; set; } = 0.2f;
public ControlNetParameter ControlNet { get; } = new();
// Stable Diffusion only
/// <summary>
/// unconditional guidance scale: (default: 7.5)
/// </summary>
public float CfgScale { get; set; } = 7.5f;
public PhotoMakerParameter PhotoMaker { get; } = new();
// Flux only
/// <summary>
///
/// </summary>
public float Guidance { get; set; } = 3.5f;
#endregion

View File

@ -64,6 +64,34 @@ public static class DiffusionParameterExtension
return parameter;
}
public static DiffusionParameter WithSlgScale(this DiffusionParameter parameter, float slgScale)
{
parameter.SlgScale = slgScale;
return parameter;
}
public static DiffusionParameter WithSkipLayers(this DiffusionParameter parameter, int[] layers)
{
parameter.SkipLayers = layers;
return parameter;
}
public static DiffusionParameter WithSkipLayerStart(this DiffusionParameter parameter, float skipLayerStart)
{
parameter.SkipLayerStart = skipLayerStart;
return parameter;
}
public static DiffusionParameter WithSkipLayerEnd(this DiffusionParameter parameter, float skipLayerEnd)
{
parameter.SkipLayerEnd = skipLayerEnd;
return parameter;
}
public static DiffusionParameter WithControlNet(this DiffusionParameter parameter, IImage image, float? strength = null)
{
parameter.ControlNet.Image = image;

View File

@ -16,6 +16,7 @@ public interface IDiffusionModelParameter
bool KeepControlNetOnCPU { get; set; }
bool KeepClipOnCPU { get; set; }
bool KeepVaeOnCPU { get; set; }
bool FlashAttention { get; set; }
RngType RngType { get; set; }
Schedule Schedule { get; set; }

View File

@ -5,7 +5,18 @@ namespace StableDiffusion.NET;
[PublicAPI]
public sealed class PhotoMakerParameter
{
/// <summary>
/// path to PHOTOMAKER input id images dir
/// </summary>
public string InputIdImageDirectory { get; set; } = string.Empty;
/// <summary>
/// strength for keeping input identity (default: 20)
/// </summary>
public float StyleRatio { get; set; } = 20f;
/// <summary>
/// normalize PHOTOMAKER input id images
/// </summary>
public bool NormalizeInput { get; set; } = false;
}

View File

@ -5,8 +5,19 @@ namespace StableDiffusion.NET;
[PublicAPI]
public sealed class UpscaleModelParameter : IQuantizedModelParameter
{
/// <summary>
/// path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now
/// </summary>
public string ModelPath { get; set; } = string.Empty;
/// <summary>
/// number of threads to use during computation (default: -1)
/// If threads = -1, then threads will be set to the number of CPU physical cores
/// </summary>
public int ThreadCount { get; set; } = 1;
/// <summary>
///
/// </summary>
public Quantization Quantization { get; set; } = Quantization.F16;
}

View File

@ -69,7 +69,8 @@ internal unsafe partial class Native
schedule_t s,
[MarshalAs(UnmanagedType.I1)] bool keep_clip_on_cpu,
[MarshalAs(UnmanagedType.I1)] bool keep_control_net_cpu,
[MarshalAs(UnmanagedType.I1)] bool keep_vae_on_cpu);
[MarshalAs(UnmanagedType.I1)] bool keep_vae_on_cpu,
[MarshalAs(UnmanagedType.I1)] bool diffusion_flash_attn);
[LibraryImport(LIB_NAME, EntryPoint = "free_sd_ctx")]
internal static partial void free_sd_ctx(sd_ctx_t* sd_ctx);
@ -91,7 +92,12 @@ internal unsafe partial class Native
float control_strength,
float style_strength,
[MarshalAs(UnmanagedType.I1)] bool normalize_input,
[MarshalAs(UnmanagedType.LPStr)] string input_id_images_path);
[MarshalAs(UnmanagedType.LPStr)] string input_id_images_path,
int[] skip_layers,
int skip_layers_count,
float slg_scale,
float skip_layer_start,
float skip_layer_end);
[LibraryImport(LIB_NAME, EntryPoint = "img2img")]
internal static partial sd_image_t* img2img(sd_ctx_t* sd_ctx,
@ -112,7 +118,12 @@ internal unsafe partial class Native
float control_strength,
float style_strength,
[MarshalAs(UnmanagedType.I1)] bool normalize_input,
[MarshalAs(UnmanagedType.LPStr)] string input_id_images_path);
[MarshalAs(UnmanagedType.LPStr)] string input_id_images_path,
int[] skip_layers,
int skip_layers_count,
float slg_scale,
float skip_layer_start,
float skip_layer_end);
[LibraryImport(LIB_NAME, EntryPoint = "img2vid")]
internal static partial sd_image_t* img2vid(sd_ctx_t* sd_ctx,