// DarthAffe 07.09.2023: Copied from https://github.com/DarthAffe/RGB.NET/blob/2e0754f474b82ed4d0cae5c6c44378d234f1321b/RGB.NET.Presets/Textures/Sampler/AverageByteSampler.cs using System; using System.Numerics; using System.Runtime.CompilerServices; namespace ScreenCapture.NET.Downscale; /// /// Represents a sampled that averages multiple byte-data entries. /// internal static class AverageByteSampler { #region Constants private static readonly int INT_VECTOR_LENGTH = Vector.Count; #endregion #region Methods public static unsafe void Sample(in SamplerInfo info, in Span pixelData) { int count = info.Width * info.Height; if (count == 0) return; int dataLength = pixelData.Length; Span sums = stackalloc uint[dataLength]; int elementsPerVector = Vector.Count / dataLength; int valuesPerVector = elementsPerVector * dataLength; if (Vector.IsHardwareAccelerated && (info.Height > 1) && (info.Width >= valuesPerVector) && (dataLength <= Vector.Count)) { int chunks = info.Width / elementsPerVector; Vector sum1 = Vector.Zero; Vector sum2 = Vector.Zero; Vector sum3 = Vector.Zero; Vector sum4 = Vector.Zero; for (int y = 0; y < info.Height; y++) { ReadOnlySpan data = info[y]; fixed (byte* colorPtr = data) { byte* current = colorPtr; for (int i = 0; i < chunks; i++) { Vector bytes = *(Vector*)current; Vector.Widen(bytes, out Vector short1, out Vector short2); Vector.Widen(short1, out Vector int1, out Vector int2); Vector.Widen(short2, out Vector int3, out Vector int4); sum1 = Vector.Add(sum1, int1); sum2 = Vector.Add(sum2, int2); sum3 = Vector.Add(sum3, int3); sum4 = Vector.Add(sum4, int4); current += valuesPerVector; } } int missingElements = data.Length - (chunks * valuesPerVector); int offset = chunks * valuesPerVector; for (int i = 0; i < missingElements; i += dataLength) for (int j = 0; j < sums.Length; j++) sums[j] += data[offset + i + j]; } int value = 0; int sumIndex = 0; for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) { sums[sumIndex] += sum1[j]; ++sumIndex; ++value; if (sumIndex >= dataLength) sumIndex = 0; } for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) { sums[sumIndex] += sum2[j]; ++sumIndex; ++value; if (sumIndex >= dataLength) sumIndex = 0; } for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) { sums[sumIndex] += sum3[j]; ++sumIndex; ++value; if (sumIndex >= dataLength) sumIndex = 0; } for (int j = 0; (j < INT_VECTOR_LENGTH) && (value < valuesPerVector); j++) { sums[sumIndex] += sum4[j]; ++sumIndex; ++value; if (sumIndex >= dataLength) sumIndex = 0; } } else { for (int y = 0; y < info.Height; y++) { ReadOnlySpan data = info[y]; for (int i = 0; i < data.Length; i += dataLength) for (int j = 0; j < sums.Length; j++) sums[j] += data[i + j]; } } float divisor = count * byte.MaxValue; for (int i = 0; i < pixelData.Length; i++) pixelData[i] = (sums[i] / divisor).GetByteValueFromPercentage(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static byte GetByteValueFromPercentage(this float percentage) { if (float.IsNaN(percentage)) return 0; percentage = percentage.Clamp(0, 1.0f); return (byte)(percentage >= 1.0f ? 255 : percentage * 256.0f); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float Clamp(this float value, float min, float max) { // ReSharper disable ConvertIfStatementToReturnStatement - I'm not sure why, but inlining this statement reduces performance by ~10% if (value < min) return min; if (value > max) return max; return value; // ReSharper restore ConvertIfStatementToReturnStatement } #endregion }