1
0
mirror of https://github.com/DarthAffe/RGB.NET.git synced 2025-12-12 17:48:31 +00:00

Added intrinsics to improve sampler-performance

This commit is contained in:
Darth Affe 2022-09-02 14:42:58 +02:00
parent f17c18e5ec
commit 7b591445b6
5 changed files with 191 additions and 14 deletions

View File

@ -36,6 +36,7 @@
<IncludeSymbols>True</IncludeSymbols>
<DebugType>portable</DebugType>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">

View File

@ -1,4 +1,6 @@
using System;
using System.Numerics;
using System.Runtime.InteropServices;
namespace RGB.NET.Core;
@ -10,21 +12,68 @@ namespace RGB.NET.Core;
/// </remarks>
public class AverageColorSampler : ISampler<Color>
{
#region Constants
private const int VALUES_PER_COLOR = 4;
private static readonly int ELEMENTS_PER_VECTOR = Vector<float>.Count / VALUES_PER_COLOR;
private static readonly int VALUES_PER_VECTOR = ELEMENTS_PER_VECTOR * VALUES_PER_COLOR;
#endregion
#region Methods
/// <inheritdoc />
public void Sample(in SamplerInfo<Color> info, in Span<Color> pixelData)
public unsafe void Sample(in SamplerInfo<Color> info, in Span<Color> pixelData)
{
int count = info.Width * info.Height;
if (count == 0) return;
float a = 0, r = 0, g = 0, b = 0;
foreach (Color color in info.Data)
if (Vector.IsHardwareAccelerated && (info.Data.Length >= Vector<float>.Count))
{
a += color.A;
r += color.R;
g += color.G;
b += color.B;
int chunks = info.Data.Length / ELEMENTS_PER_VECTOR;
int missingElements = info.Data.Length - (chunks * ELEMENTS_PER_VECTOR);
Vector<float> sum = Vector<float>.Zero;
fixed (Color* colorPtr = &MemoryMarshal.GetReference(info.Data))
{
Color* current = colorPtr;
for (int i = 0; i < chunks; i++)
{
sum = Vector.Add(sum, *(Vector<float>*)current);
current += ELEMENTS_PER_VECTOR;
}
}
for (int i = 0; i < VALUES_PER_VECTOR; i += VALUES_PER_COLOR)
{
a += sum[i];
r += sum[i + 1];
g += sum[i + 2];
b += sum[i + 3];
}
for (int i = 0; i < missingElements; i++)
{
Color color = info.Data[^(i + 1)];
a += color.A;
r += color.R;
g += color.G;
b += color.B;
}
}
else
{
foreach (Color color in info.Data)
{
a += color.A;
r += color.R;
g += color.G;
b += color.B;
}
}
pixelData[0] = new Color(a / count, r / count, g / count, b / count);

View File

@ -36,6 +36,7 @@
<IncludeSymbols>True</IncludeSymbols>
<DebugType>portable</DebugType>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">

View File

@ -1,4 +1,6 @@
using System;
using System.Numerics;
using System.Runtime.InteropServices;
using RGB.NET.Core;
namespace RGB.NET.Presets.Textures.Sampler;
@ -8,10 +10,16 @@ namespace RGB.NET.Presets.Textures.Sampler;
/// </summary>
public class AverageByteSampler : ISampler<byte>
{
#region Constants
private static readonly int INT_VECTOR_LENGTH = Vector<uint>.Count;
#endregion
#region Methods
/// <inheritdoc />
public void Sample(in SamplerInfo<byte> info, in Span<byte> pixelData)
public unsafe void Sample(in SamplerInfo<byte> info, in Span<byte> pixelData)
{
int count = info.Width * info.Height;
if (count == 0) return;
@ -20,9 +28,92 @@ public class AverageByteSampler : ISampler<byte>
int dataLength = pixelData.Length;
Span<uint> sums = stackalloc uint[dataLength];
for (int i = 0; i < data.Length; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[i + j];
if (Vector.IsHardwareAccelerated && (data.Length >= Vector<byte>.Count) && (dataLength <= Vector<byte>.Count))
{
int elementsPerVector = Vector<byte>.Count / dataLength;
int valuesPerVector = elementsPerVector * dataLength;
int chunks = data.Length / valuesPerVector;
int missingElements = data.Length - (chunks * valuesPerVector);
Vector<uint> sum1 = Vector<uint>.Zero;
Vector<uint> sum2 = Vector<uint>.Zero;
Vector<uint> sum3 = Vector<uint>.Zero;
Vector<uint> sum4 = Vector<uint>.Zero;
fixed (byte* colorPtr = &MemoryMarshal.GetReference(data))
{
byte* current = colorPtr;
for (int i = 0; i < chunks; i++)
{
Vector<byte> bytes = *(Vector<byte>*)current;
Vector.Widen(bytes, out Vector<ushort> short1, out Vector<ushort> short2);
Vector.Widen(short1, out Vector<uint> int1, out Vector<uint> int2);
Vector.Widen(short2, out Vector<uint> int3, out Vector<uint> int4);
sum1 = Vector.Add(sum1, int1);
sum2 = Vector.Add(sum2, int2);
sum3 = Vector.Add(sum3, int3);
sum4 = Vector.Add(sum4, int4);
current += valuesPerVector;
}
}
int value = 0;
int sumIndex = 0;
for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++)
{
sums[sumIndex] += sum1[j];
++sumIndex;
++value;
if (sumIndex >= dataLength)
sumIndex = 0;
}
for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++)
{
sums[sumIndex] += sum2[j];
++sumIndex;
++value;
if (sumIndex >= dataLength)
sumIndex = 0;
}
for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++)
{
sums[sumIndex] += sum3[j];
++sumIndex;
++value;
if (sumIndex >= dataLength)
sumIndex = 0;
}
for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++)
{
sums[sumIndex] += sum4[j];
++sumIndex;
++value;
if (sumIndex >= dataLength)
sumIndex = 0;
}
int offset = chunks * valuesPerVector;
for (int i = 0; i < missingElements; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[offset + i + j];
}
else
{
for (int i = 0; i < data.Length; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[i + j];
}
float divisor = count * byte.MaxValue;
for (int i = 0; i < pixelData.Length; i++)

View File

@ -1,4 +1,6 @@
using System;
using System.Numerics;
using System.Runtime.InteropServices;
using RGB.NET.Core;
namespace RGB.NET.Presets.Textures.Sampler;
@ -11,7 +13,7 @@ public class AverageFloatSampler : ISampler<float>
#region Methods
/// <inheritdoc />
public void Sample(in SamplerInfo<float> info, in Span<float> pixelData)
public unsafe void Sample(in SamplerInfo<float> info, in Span<float> pixelData)
{
int count = info.Width * info.Height;
if (count == 0) return;
@ -20,9 +22,42 @@ public class AverageFloatSampler : ISampler<float>
int dataLength = pixelData.Length;
Span<float> sums = stackalloc float[dataLength];
for (int i = 0; i < data.Length; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[i + j];
if (Vector.IsHardwareAccelerated && (data.Length >= Vector<float>.Count) && (dataLength <= Vector<float>.Count))
{
int elementsPerVector = Vector<float>.Count / dataLength;
int valuesPerVector = elementsPerVector * dataLength;
int chunks = data.Length / valuesPerVector;
int missingElements = data.Length - (chunks * valuesPerVector);
Vector<float> sum = Vector<float>.Zero;
fixed (float* colorPtr = &MemoryMarshal.GetReference(data))
{
float* current = colorPtr;
for (int i = 0; i < chunks; i++)
{
sum = Vector.Add(sum, *(Vector<float>*)current);
current += valuesPerVector;
}
}
for (int i = 0; i < valuesPerVector; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += sum[i + j];
int offset = chunks * valuesPerVector;
for (int i = 0; i < missingElements; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[offset + i + j];
}
else
{
for (int i = 0; i < data.Length; i += dataLength)
for (int j = 0; j < sums.Length; j++)
sums[j] += data[i + j];
}
for (int i = 0; i < pixelData.Length; i++)
pixelData[i] = sums[i] / count;