diff --git a/RGB.NET.Core/RGB.NET.Core.csproj b/RGB.NET.Core/RGB.NET.Core.csproj index ea411e3..d1a9b91 100644 --- a/RGB.NET.Core/RGB.NET.Core.csproj +++ b/RGB.NET.Core/RGB.NET.Core.csproj @@ -36,6 +36,7 @@ True portable snupkg + true diff --git a/RGB.NET.Core/Rendering/Textures/Sampler/AverageColorSampler.cs b/RGB.NET.Core/Rendering/Textures/Sampler/AverageColorSampler.cs index 257c5d9..f93d212 100644 --- a/RGB.NET.Core/Rendering/Textures/Sampler/AverageColorSampler.cs +++ b/RGB.NET.Core/Rendering/Textures/Sampler/AverageColorSampler.cs @@ -1,4 +1,6 @@ using System; +using System.Numerics; +using System.Runtime.InteropServices; namespace RGB.NET.Core; @@ -10,21 +12,68 @@ namespace RGB.NET.Core; /// public class AverageColorSampler : ISampler { + #region Constants + + private const int VALUES_PER_COLOR = 4; + private static readonly int ELEMENTS_PER_VECTOR = Vector.Count / VALUES_PER_COLOR; + private static readonly int VALUES_PER_VECTOR = ELEMENTS_PER_VECTOR * VALUES_PER_COLOR; + + #endregion + #region Methods /// - public void Sample(in SamplerInfo info, in Span pixelData) + public unsafe void Sample(in SamplerInfo info, in Span pixelData) { int count = info.Width * info.Height; if (count == 0) return; float a = 0, r = 0, g = 0, b = 0; - foreach (Color color in info.Data) + + if (Vector.IsHardwareAccelerated && (info.Data.Length >= Vector.Count)) { - a += color.A; - r += color.R; - g += color.G; - b += color.B; + int chunks = info.Data.Length / ELEMENTS_PER_VECTOR; + int missingElements = info.Data.Length - (chunks * ELEMENTS_PER_VECTOR); + + Vector sum = Vector.Zero; + + fixed (Color* colorPtr = &MemoryMarshal.GetReference(info.Data)) + { + Color* current = colorPtr; + for (int i = 0; i < chunks; i++) + { + sum = Vector.Add(sum, *(Vector*)current); + current += ELEMENTS_PER_VECTOR; + } + } + + for (int i = 0; i < VALUES_PER_VECTOR; i += VALUES_PER_COLOR) + { + a += sum[i]; + r += sum[i + 1]; + g += sum[i + 2]; + b += sum[i + 3]; + } + + for (int i = 0; i < missingElements; i++) + { + Color color = info.Data[^(i + 1)]; + + a += color.A; + r += color.R; + g += color.G; + b += color.B; + } + } + else + { + foreach (Color color in info.Data) + { + a += color.A; + r += color.R; + g += color.G; + b += color.B; + } } pixelData[0] = new Color(a / count, r / count, g / count, b / count); diff --git a/RGB.NET.Presets/RGB.NET.Presets.csproj b/RGB.NET.Presets/RGB.NET.Presets.csproj index e561576..6133d0a 100644 --- a/RGB.NET.Presets/RGB.NET.Presets.csproj +++ b/RGB.NET.Presets/RGB.NET.Presets.csproj @@ -36,6 +36,7 @@ True portable snupkg + true diff --git a/RGB.NET.Presets/Textures/Sampler/AverageByteSampler.cs b/RGB.NET.Presets/Textures/Sampler/AverageByteSampler.cs index 4eea2c1..8d5b8b6 100644 --- a/RGB.NET.Presets/Textures/Sampler/AverageByteSampler.cs +++ b/RGB.NET.Presets/Textures/Sampler/AverageByteSampler.cs @@ -1,4 +1,6 @@ using System; +using System.Numerics; +using System.Runtime.InteropServices; using RGB.NET.Core; namespace RGB.NET.Presets.Textures.Sampler; @@ -8,10 +10,16 @@ namespace RGB.NET.Presets.Textures.Sampler; /// public class AverageByteSampler : ISampler { + #region Constants + + private static readonly int INT_VECTOR_LENGTH = Vector.Count; + + #endregion + #region Methods /// - public void Sample(in SamplerInfo info, in Span pixelData) + public unsafe void Sample(in SamplerInfo info, in Span pixelData) { int count = info.Width * info.Height; if (count == 0) return; @@ -20,9 +28,92 @@ public class AverageByteSampler : ISampler int dataLength = pixelData.Length; Span sums = stackalloc uint[dataLength]; - for (int i = 0; i < data.Length; i += dataLength) - for (int j = 0; j < sums.Length; j++) - sums[j] += data[i + j]; + + if (Vector.IsHardwareAccelerated && (data.Length >= Vector.Count) && (dataLength <= Vector.Count)) + { + int elementsPerVector = Vector.Count / dataLength; + int valuesPerVector = elementsPerVector * dataLength; + + int chunks = data.Length / valuesPerVector; + int missingElements = data.Length - (chunks * valuesPerVector); + + Vector sum1 = Vector.Zero; + Vector sum2 = Vector.Zero; + Vector sum3 = Vector.Zero; + Vector sum4 = Vector.Zero; + + fixed (byte* colorPtr = &MemoryMarshal.GetReference(data)) + { + byte* current = colorPtr; + for (int i = 0; i < chunks; i++) + { + Vector bytes = *(Vector*)current; + Vector.Widen(bytes, out Vector short1, out Vector short2); + Vector.Widen(short1, out Vector int1, out Vector int2); + Vector.Widen(short2, out Vector int3, out Vector int4); + + sum1 = Vector.Add(sum1, int1); + sum2 = Vector.Add(sum2, int2); + sum3 = Vector.Add(sum3, int3); + sum4 = Vector.Add(sum4, int4); + + current += valuesPerVector; + } + } + + int value = 0; + int sumIndex = 0; + for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) + { + sums[sumIndex] += sum1[j]; + ++sumIndex; + ++value; + + if (sumIndex >= dataLength) + sumIndex = 0; + } + + for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) + { + sums[sumIndex] += sum2[j]; + ++sumIndex; + ++value; + + if (sumIndex >= dataLength) + sumIndex = 0; + } + + for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) + { + sums[sumIndex] += sum3[j]; + ++sumIndex; + ++value; + + if (sumIndex >= dataLength) + sumIndex = 0; + } + + for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) + { + sums[sumIndex] += sum4[j]; + ++sumIndex; + ++value; + + if (sumIndex >= dataLength) + sumIndex = 0; + } + + int offset = chunks * valuesPerVector; + for (int i = 0; i < missingElements; i += dataLength) + for (int j = 0; j < sums.Length; j++) + sums[j] += data[offset + i + j]; + } + else + { + for (int i = 0; i < data.Length; i += dataLength) + for (int j = 0; j < sums.Length; j++) + sums[j] += data[i + j]; + } float divisor = count * byte.MaxValue; for (int i = 0; i < pixelData.Length; i++) diff --git a/RGB.NET.Presets/Textures/Sampler/AverageFloatSampler.cs b/RGB.NET.Presets/Textures/Sampler/AverageFloatSampler.cs index c9009a1..50e9563 100644 --- a/RGB.NET.Presets/Textures/Sampler/AverageFloatSampler.cs +++ b/RGB.NET.Presets/Textures/Sampler/AverageFloatSampler.cs @@ -1,4 +1,6 @@ using System; +using System.Numerics; +using System.Runtime.InteropServices; using RGB.NET.Core; namespace RGB.NET.Presets.Textures.Sampler; @@ -11,7 +13,7 @@ public class AverageFloatSampler : ISampler #region Methods /// - public void Sample(in SamplerInfo info, in Span pixelData) + public unsafe void Sample(in SamplerInfo info, in Span pixelData) { int count = info.Width * info.Height; if (count == 0) return; @@ -20,9 +22,42 @@ public class AverageFloatSampler : ISampler int dataLength = pixelData.Length; Span sums = stackalloc float[dataLength]; - for (int i = 0; i < data.Length; i += dataLength) - for (int j = 0; j < sums.Length; j++) - sums[j] += data[i + j]; + + if (Vector.IsHardwareAccelerated && (data.Length >= Vector.Count) && (dataLength <= Vector.Count)) + { + int elementsPerVector = Vector.Count / dataLength; + int valuesPerVector = elementsPerVector * dataLength; + + int chunks = data.Length / valuesPerVector; + int missingElements = data.Length - (chunks * valuesPerVector); + + Vector sum = Vector.Zero; + + fixed (float* colorPtr = &MemoryMarshal.GetReference(data)) + { + float* current = colorPtr; + for (int i = 0; i < chunks; i++) + { + sum = Vector.Add(sum, *(Vector*)current); + current += valuesPerVector; + } + } + + for (int i = 0; i < valuesPerVector; i += dataLength) + for (int j = 0; j < sums.Length; j++) + sums[j] += sum[i + j]; + + int offset = chunks * valuesPerVector; + for (int i = 0; i < missingElements; i += dataLength) + for (int j = 0; j < sums.Length; j++) + sums[j] += data[offset + i + j]; + } + else + { + for (int i = 0; i < data.Length; i += dataLength) + for (int j = 0; j < sums.Length; j++) + sums[j] += data[i + j]; + } for (int i = 0; i < pixelData.Length; i++) pixelData[i] = sums[i] / count;