From 3f9dd933d3a228f0e34e3aabc6d6847a07ae5d11 Mon Sep 17 00:00:00 2001 From: Darth Affe Date: Mon, 8 Jul 2024 23:40:18 +0200 Subject: [PATCH] Refactored conversion and implemented narrowing and widening --- HPPH.Test/ConvertTests.cs | 186 +++++++++++++++-- HPPH/PixelHelper.Convert.cs | 404 +++++++++++++++++++++++++++--------- 2 files changed, 478 insertions(+), 112 deletions(-) diff --git a/HPPH.Test/ConvertTests.cs b/HPPH.Test/ConvertTests.cs index 2376953..b05e767 100644 --- a/HPPH.Test/ConvertTests.cs +++ b/HPPH.Test/ConvertTests.cs @@ -24,10 +24,10 @@ public class ConvertTests ColorRGB reference = referenceData[i]; ColorBGR test = result[i]; - Assert.AreEqual(reference.R, test.R, "R differs"); - Assert.AreEqual(reference.G, test.G, "G differs"); - Assert.AreEqual(reference.B, test.B, "B differs"); - Assert.AreEqual(reference.A, test.A, "A differs"); + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); } } } @@ -51,10 +51,10 @@ public class ConvertTests ColorRGBA reference = referenceData[i]; ColorARGB test = result[i]; - Assert.AreEqual(reference.R, test.R, "R differs"); - Assert.AreEqual(reference.G, test.G, "G differs"); - Assert.AreEqual(reference.B, test.B, "B differs"); - Assert.AreEqual(reference.A, test.A, "A differs"); + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); } } } @@ -78,10 +78,172 @@ public class ConvertTests ColorRGBA reference = referenceData[i]; ColorBGRA test = result[i]; - Assert.AreEqual(reference.R, test.R, "R differs"); - Assert.AreEqual(reference.G, test.G, "G differs"); - Assert.AreEqual(reference.B, test.B, "B differs"); - Assert.AreEqual(reference.A, test.A, "A differs"); + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertNarrow4ByteRGBAToRGB() + { + foreach (string image in GetTestImages()) + { + ColorRGBA[] data = ImageHelper.Get4ByteColorsFromImage(image); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGBA[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGBA reference = referenceData[i]; + ColorRGB test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertNarrow4ByteRGBAToBGR() + { + foreach (string image in GetTestImages()) + { + ColorRGBA[] data = ImageHelper.Get4ByteColorsFromImage(image); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGBA[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGBA reference = referenceData[i]; + ColorBGR test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertWiden3ByteRGBToRGBA() + { + foreach (string image in GetTestImages()) + { + ColorRGB[] data = ImageHelper.Get3ByteColorsFromImage(image); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGB[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGB reference = referenceData[i]; + ColorRGBA test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertWiden3ByteRGBToARGB() + { + foreach (string image in GetTestImages()) + { + ColorRGB[] data = ImageHelper.Get3ByteColorsFromImage(image); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGB[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGB reference = referenceData[i]; + ColorARGB test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertWiden3ByteRGBToBGRA() + { + foreach (string image in GetTestImages()) + { + ColorRGB[] data = ImageHelper.Get3ByteColorsFromImage(image); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGB[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGB reference = referenceData[i]; + ColorBGRA test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); + } + } + } + + [TestMethod] + public void ConvertWiden3ByteRGBToABGR() + { + foreach (string image in GetTestImages()) + { + ColorRGB[] data = ImageHelper.Get3ByteColorsFromImage(image).SkipLast(1).ToArray(); + ReadOnlySpan referenceData = data; + + Span sourceData = new ColorRGB[referenceData.Length]; + referenceData.CopyTo(sourceData); + + Span result = PixelHelper.Convert(sourceData); + + Assert.AreEqual(referenceData.Length, result.Length); + for (int i = 0; i < referenceData.Length; i++) + { + ColorRGB reference = referenceData[i]; + ColorABGR test = result[i]; + + Assert.AreEqual(reference.R, test.R, $"R differs at index {i}"); + Assert.AreEqual(reference.G, test.G, $"G differs at index {i}"); + Assert.AreEqual(reference.B, test.B, $"B differs at index {i}"); + Assert.AreEqual(reference.A, test.A, $"A differs at index {i}"); } } } diff --git a/HPPH/PixelHelper.Convert.cs b/HPPH/PixelHelper.Convert.cs index 0a0201b..a88974e 100644 --- a/HPPH/PixelHelper.Convert.cs +++ b/HPPH/PixelHelper.Convert.cs @@ -7,110 +7,84 @@ public static unsafe partial class PixelHelper { #region Methods - public static Span Convert(ReadOnlySpan data) + #region In-Place + + public static Span ConvertInPlace(Span colors) where TSource : struct, IColor where TTarget : struct, IColor { - if (data == null) throw new ArgumentNullException(nameof(data)); + if (colors == null) throw new ArgumentNullException(nameof(colors)); + if (colors.Length == 0) return MemoryMarshal.Cast(colors); - Span dataCopy = new TSource[data.Length]; - data.CopyTo(dataCopy); + IColorFormat sourceFormat = TSource.ColorFormat; + IColorFormat targetFormat = TTarget.ColorFormat; - return Convert(dataCopy); + if (sourceFormat == targetFormat) return MemoryMarshal.Cast(colors); + if (sourceFormat.BytesPerPixel != targetFormat.BytesPerPixel) throw new NotSupportedException("In-place conversion requires the same BPP for source and target."); + + Span data = MemoryMarshal.AsBytes(colors); + Convert(data, data, sourceFormat, targetFormat); + + return MemoryMarshal.Cast(data); } - public static Span Convert(Span data) + #endregion + + #region Allocating + + public static TTarget[] Convert(Span colors) where TSource : struct, IColor where TTarget : struct, IColor { - if (data == null) throw new ArgumentNullException(nameof(data)); + if (colors == null) throw new ArgumentNullException(nameof(colors)); - Convert(MemoryMarshal.AsBytes(data), TSource.ColorFormat, TTarget.ColorFormat); - - return MemoryMarshal.Cast(data); + TTarget[] buffer = new TTarget[colors.Length]; + Convert(colors, buffer.AsSpan()); + return buffer; } - internal static void Convert(Span data, IColorFormat sourceFormat, IColorFormat targetFormat) + public static TTarget[] Convert(ReadOnlySpan colors) + where TSource : struct, IColor + where TTarget : struct, IColor { - if (data == null) throw new ArgumentNullException(nameof(data)); - ArgumentNullException.ThrowIfNull(sourceFormat); - ArgumentNullException.ThrowIfNull(targetFormat); + if (colors == null) throw new ArgumentNullException(nameof(colors)); - if (sourceFormat == targetFormat) return; - - if (sourceFormat.BytesPerPixel == targetFormat.BytesPerPixel) - ConvertEqualBpp(data, sourceFormat, targetFormat); - else if ((sourceFormat.BytesPerPixel == 3) && (targetFormat.BytesPerPixel == 4)) - ConvertWiden3To4Bytes(data, sourceFormat, targetFormat); - else if ((sourceFormat.BytesPerPixel == 4) && (targetFormat.BytesPerPixel == 3)) - ConvertNarrow4To3Bytes(data, sourceFormat, targetFormat); - else - throw new NotSupportedException("Data is not of a supported valid color-type."); + TTarget[] buffer = new TTarget[colors.Length]; + Convert(colors, buffer.AsSpan()); + return buffer; } - private static void ConvertEqualBpp(Span data, IColorFormat sourceFormat, IColorFormat targetFormat) + public static void Convert(ReadOnlySpan source, Span target) + where TSource : struct, IColor + where TTarget : struct, IColor { - ReadOnlySpan sourceMapping = sourceFormat.ByteMapping; - ReadOnlySpan targetMapping = targetFormat.ByteMapping; + if (source == null) throw new ArgumentNullException(nameof(source)); + if (target == null) throw new ArgumentNullException(nameof(target)); + if (target.Length < source.Length) throw new ArgumentException($"Target-buffer is not big enough. {target.Length} < {source.Length}", nameof(target)); + + Convert(MemoryMarshal.AsBytes(source), MemoryMarshal.AsBytes(target), TSource.ColorFormat, TTarget.ColorFormat); + } + + private static void Convert(ReadOnlySpan source, Span target, IColorFormat sourceFormat, IColorFormat targetFormat) + { + if (source.Length == 0) return; switch (sourceFormat.BytesPerPixel) { - case 3: - ReadOnlySpan mapping3 = [targetMapping[sourceMapping[0]], targetMapping[sourceMapping[1]], targetMapping[sourceMapping[2]]]; - ReadOnlySpan mask3 = - [ - mapping3[0], - mapping3[1], - mapping3[2], - - (byte)(mapping3[0] + 3), - (byte)(mapping3[1] + 3), - (byte)(mapping3[2] + 3), - - (byte)(mapping3[0] + 6), - (byte)(mapping3[1] + 6), - (byte)(mapping3[2] + 6), - - (byte)(mapping3[0] + 9), - (byte)(mapping3[1] + 9), - (byte)(mapping3[2] + 9), - - (byte)(mapping3[0] + 12), - (byte)(mapping3[1] + 12), - (byte)(mapping3[2] + 12), - - 15 - ]; - - ConvertEqualBpp(data, mask3, 3); + case 3 when (targetFormat.BytesPerPixel == 3): + Convert3Bytes(source, target, sourceFormat, targetFormat); break; - case 4: - ReadOnlySpan mapping4 = [targetMapping[sourceMapping[0]], targetMapping[sourceMapping[1]], targetMapping[sourceMapping[2]], targetMapping[sourceMapping[3]]]; - ReadOnlySpan mask4 = - [ - mapping4[0], - mapping4[1], - mapping4[2], - mapping4[3], + case 4 when (targetFormat.BytesPerPixel == 4): + Convert4Bytes(source, target, sourceFormat, targetFormat); + break; - (byte)(mapping4[0] + 4), - (byte)(mapping4[1] + 4), - (byte)(mapping4[2] + 4), - (byte)(mapping4[3] + 4), + case 3 when (targetFormat.BytesPerPixel == 4): + ConvertWiden3To4Bytes(source, target, sourceFormat, targetFormat); + break; - (byte)(mapping4[0] + 8), - (byte)(mapping4[1] + 8), - (byte)(mapping4[2] + 8), - (byte)(mapping4[3] + 8), - - (byte)(mapping4[0] + 12), - (byte)(mapping4[1] + 12), - (byte)(mapping4[2] + 12), - (byte)(mapping4[3] + 12), - ]; - - ConvertEqualBpp(data, mask4, 4); + case 4 when (targetFormat.BytesPerPixel == 3): + ConvertNarrow4To3Bytes(source, target, sourceFormat, targetFormat); break; default: @@ -118,50 +92,280 @@ public static unsafe partial class PixelHelper } } - // DarthAffe 07.07.2024: No fallback-implementation here. Shuffle Requires only Ssse3 which should be supported nearly anywhere and if not the fallback of Vector128.Shuffle is perfectly fine. - private static void ConvertEqualBpp(Span data, ReadOnlySpan mask, int bpp) + private static void Convert3Bytes(ReadOnlySpan source, Span target, IColorFormat sourceFormat, IColorFormat targetFormat) + { + ReadOnlySpan sourceMapping = sourceFormat.ByteMapping; + ReadOnlySpan targetMapping = targetFormat.ByteMapping; + + ReadOnlySpan mapping = [sourceMapping[targetMapping[0]], sourceMapping[targetMapping[1]], sourceMapping[targetMapping[2]]]; + ReadOnlySpan mask = + [ + mapping[0], + mapping[1], + mapping[2], + + (byte)(mapping[0] + 3), + (byte)(mapping[1] + 3), + (byte)(mapping[2] + 3), + + (byte)(mapping[0] + 6), + (byte)(mapping[1] + 6), + (byte)(mapping[2] + 6), + + (byte)(mapping[0] + 9), + (byte)(mapping[1] + 9), + (byte)(mapping[2] + 9), + + (byte)(mapping[0] + 12), + (byte)(mapping[1] + 12), + (byte)(mapping[2] + 12), + + 15 + ]; + + ConvertSameBpp(source, target, mask, 3); + } + + private static void Convert4Bytes(ReadOnlySpan source, Span target, IColorFormat sourceFormat, IColorFormat targetFormat) + { + ReadOnlySpan sourceMapping = sourceFormat.ByteMapping; + ReadOnlySpan targetMapping = targetFormat.ByteMapping; + + ReadOnlySpan mapping = [sourceMapping[targetMapping[0]], sourceMapping[targetMapping[1]], sourceMapping[targetMapping[2]], sourceMapping[targetMapping[3]]]; + ReadOnlySpan mask = + [ + mapping[0], + mapping[1], + mapping[2], + mapping[3], + + (byte)(mapping[0] + 4), + (byte)(mapping[1] + 4), + (byte)(mapping[2] + 4), + (byte)(mapping[3] + 4), + + (byte)(mapping[0] + 8), + (byte)(mapping[1] + 8), + (byte)(mapping[2] + 8), + (byte)(mapping[3] + 8), + + (byte)(mapping[0] + 12), + (byte)(mapping[1] + 12), + (byte)(mapping[2] + 12), + (byte)(mapping[3] + 12), + ]; + + ConvertSameBpp(source, target, mask, 4); + } + + private static void ConvertSameBpp(ReadOnlySpan source, Span target, ReadOnlySpan mask, int bpp) { int elementsPerVector = Vector128.Count / bpp; int bytesPerVector = elementsPerVector * bpp; - int chunks = data.Length / bytesPerVector; + int chunks = source.Length / bytesPerVector; Vector128 maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask)); - int missingElements = (data.Length - (chunks * bytesPerVector)) / bpp; + int missingElements = (source.Length - (chunks * bytesPerVector)) / bpp; - fixed (byte* dataPtr = data) + fixed (byte* sourcePtr = source) + fixed (byte* targetPtr = target) { - byte* ptr = dataPtr; + byte* src = sourcePtr; + byte* tar = targetPtr; for (int i = 0; i < chunks; i++) { - Vector128 vector = Vector128.Load(ptr); - Vector128.Shuffle(vector, maskVector).Store(ptr); + Vector128 vector = Vector128.Load(src); + Vector128.Shuffle(vector, maskVector).Store(tar); - ptr += bytesPerVector; + src += bytesPerVector; + tar += bytesPerVector; } - Span buffer = stackalloc byte[missingElements * bpp]; // DarthAffe 07.07.2024: This is fine as it's always < 16 bytes - for (int i = 0; i < missingElements; i++) - { - int elementIndex = i * buffer.Length; - for (int j = 0; j < buffer.Length; j++) - buffer[elementIndex + j] = ptr[elementIndex + mask[j]]; - } + Span buffer = stackalloc byte[missingElements * bpp]; // DarthAffe 08.07.2024: This is fine as it's always < 16 bytes + for (int j = 0; j < buffer.Length; j++) + buffer[j] = src[mask[j]]; - buffer.CopyTo(new Span(ptr, buffer.Length)); + buffer.CopyTo(new Span(tar, buffer.Length)); } } - private static void ConvertWiden3To4Bytes(Span data, IColorFormat sourceFormat, IColorFormat targetFormat) + private static void ConvertWiden3To4Bytes(ReadOnlySpan source, Span target, IColorFormat sourceFormat, IColorFormat targetFormat) { - throw new NotImplementedException(); + ReadOnlySpan sourceMapping = sourceFormat.ByteMapping; + ReadOnlySpan targetMapping = targetFormat.ByteMapping; + + // DarthAffe 08.07.2024: For now alpha is the only thing to be added + Span isAlpha = + [ + targetMapping[0] == Color.A ? byte.MaxValue : (byte)0, + targetMapping[1] == Color.A ? byte.MaxValue : (byte)0, + targetMapping[2] == Color.A ? byte.MaxValue : (byte)0, + targetMapping[3] == Color.A ? byte.MaxValue : (byte)0, + ]; + + ReadOnlySpan mapping = + [ + isAlpha[0] > 0 ? (byte)0 : sourceMapping[targetMapping[0]], + isAlpha[1] > 0 ? (byte)0 : sourceMapping[targetMapping[1]], + isAlpha[2] > 0 ? (byte)0 : sourceMapping[targetMapping[2]], + isAlpha[3] > 0 ? (byte)0 : sourceMapping[targetMapping[3]] + ]; + + ReadOnlySpan mask = + [ + mapping[0], + mapping[1], + mapping[2], + mapping[3], + + (byte)(mapping[0] + 3), + (byte)(mapping[1] + 3), + (byte)(mapping[2] + 3), + (byte)(mapping[3] + 3), + + (byte)(mapping[0] + 6), + (byte)(mapping[1] + 6), + (byte)(mapping[2] + 6), + (byte)(mapping[3] + 6), + + (byte)(mapping[0] + 9), + (byte)(mapping[1] + 9), + (byte)(mapping[2] + 9), + (byte)(mapping[3] + 9), + ]; + + ReadOnlySpan alphaMask = + [ + isAlpha[0], + isAlpha[1], + isAlpha[2], + isAlpha[3], + + isAlpha[0], + isAlpha[1], + isAlpha[2], + isAlpha[3], + + isAlpha[0], + isAlpha[1], + isAlpha[2], + isAlpha[3], + + isAlpha[0], + isAlpha[1], + isAlpha[2], + isAlpha[3], + ]; + + int sourceBpp = sourceFormat.BytesPerPixel; + int targetBpp = targetFormat.BytesPerPixel; + + int targetElementsPerVector = Vector128.Count / targetBpp; + int targetBytesPerVector = targetElementsPerVector * targetBpp; + int sourceBytesPerVector = targetElementsPerVector * sourceBpp; + + int chunks = (source.Length / sourceBytesPerVector); + Vector128 maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask)); + Vector128 alphaMaskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(alphaMask)); + + int missingElements = (source.Length - (chunks * sourceBytesPerVector)) / sourceBpp; + + fixed (byte* sourcePtr = source) + fixed (byte* targetPtr = target) + { + byte* src = sourcePtr; + byte* tar = targetPtr; + + for (int i = 0; i < chunks; i++) + { + Vector128 vector = Vector128.Load(src); + Vector128 shuffled = Vector128.Shuffle(vector, maskVector); + Vector128.BitwiseOr(shuffled, alphaMaskVector).Store(tar); + + src += sourceBytesPerVector; + tar += targetBytesPerVector; + } + + Span buffer = stackalloc byte[missingElements * targetBpp]; // DarthAffe 08.07.2024: This is fine as it's always < 16 bytes + for (int i = 0; i < missingElements; i++) + for (int j = 0; j < targetBpp; j++) + buffer[(i * targetBpp) + j] = Math.Max(isAlpha[j], src[(i * sourceBpp) + mask[j]]); + + buffer.CopyTo(new Span(tar, buffer.Length)); + } } - private static void ConvertNarrow4To3Bytes(Span data, IColorFormat sourceFormat, IColorFormat targetFormat) + private static void ConvertNarrow4To3Bytes(ReadOnlySpan source, Span target, IColorFormat sourceFormat, IColorFormat targetFormat) { - throw new NotImplementedException(); + ReadOnlySpan sourceMapping = sourceFormat.ByteMapping; + ReadOnlySpan targetMapping = targetFormat.ByteMapping; + + // DarthAffe 08.07.2024: For now alpha is the only thing to be narrowed away + ReadOnlySpan mapping = [sourceMapping[targetMapping[0]], sourceMapping[targetMapping[1]], sourceMapping[targetMapping[2]]]; + + ReadOnlySpan mask = + [ + mapping[0], + mapping[1], + mapping[2], + + (byte)(mapping[0] + 4), + (byte)(mapping[1] + 4), + (byte)(mapping[2] + 4), + + (byte)(mapping[0] + 8), + (byte)(mapping[1] + 8), + (byte)(mapping[2] + 8), + + (byte)(mapping[0] + 12), + (byte)(mapping[1] + 12), + (byte)(mapping[2] + 12), + + 12, + 13, + 14, + 15 + ]; + + int sourceBpp = sourceFormat.BytesPerPixel; + int targetBpp = targetFormat.BytesPerPixel; + + int sourceElementsPerVector = Vector128.Count / sourceBpp; + int sourceBytesPerVector = sourceElementsPerVector * sourceBpp; + int targetBytesPerVector = sourceElementsPerVector * targetBpp; + + int chunks = (source.Length / sourceBytesPerVector) - 1; // DarthAffe 08.07.2024: -1 since we don't have enough space to copy a full target vector for the last set + Vector128 maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask)); + + int missingElements = (source.Length - (chunks * sourceBytesPerVector)) / sourceBpp; + + fixed (byte* sourcePtr = source) + fixed (byte* targetPtr = target) + { + byte* src = sourcePtr; + byte* tar = targetPtr; + + for (int i = 0; i < chunks; i++) + { + Vector128 vector = Vector128.Load(src); + Vector128.Shuffle(vector, maskVector).Store(tar); + + src += sourceBytesPerVector; + tar += targetBytesPerVector; + } + + Span buffer = stackalloc byte[missingElements * targetBpp]; // DarthAffe 08.07.2024: This is fine as it's always < 24 bytes + for (int i = 0; i < missingElements; i++) + for (int j = 0; j < targetBpp; j++) + buffer[(i * targetBpp) + j] = src[(i * sourceBpp) + mask[j]]; + + buffer.CopyTo(new Span(tar, buffer.Length)); + } } #endregion + + #endregion }