mirror of
https://github.com/DarthAffe/HPPH.git
synced 2025-12-12 13:28:37 +00:00
Merge pull request #1 from DarthAffe/ParallelConversion
Parallel conversion
This commit is contained in:
commit
b9aa24aeec
@ -5,6 +5,12 @@ namespace HPPH;
|
||||
|
||||
public static unsafe partial class PixelHelper
|
||||
{
|
||||
#region Constants
|
||||
|
||||
private const int MIN_BATCH_SIZE = 8;
|
||||
|
||||
#endregion
|
||||
|
||||
#region Methods
|
||||
|
||||
public static Span<TTarget> ConvertInPlace<TSource, TTarget>(this Span<TSource> colors)
|
||||
@ -105,6 +111,8 @@ public static unsafe partial class PixelHelper
|
||||
|
||||
private static void Convert3Bytes(ReadOnlySpan<byte> source, Span<byte> target, IColorFormat sourceFormat, IColorFormat targetFormat)
|
||||
{
|
||||
const int BPP = 3;
|
||||
|
||||
ReadOnlySpan<byte> sourceMapping = sourceFormat.ByteMapping;
|
||||
ReadOnlySpan<byte> targetMapping = targetFormat.ByteMapping;
|
||||
|
||||
@ -133,12 +141,97 @@ public static unsafe partial class PixelHelper
|
||||
|
||||
15
|
||||
];
|
||||
Vector128<byte> maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask));
|
||||
|
||||
ConvertSameBpp(source, target, mask, 3);
|
||||
int elements = source.Length / BPP;
|
||||
int elementsPerVector = Vector128<byte>.Count / BPP;
|
||||
int bytesPerVector = elementsPerVector * BPP;
|
||||
|
||||
int chunks = elements / elementsPerVector;
|
||||
int batches = Math.Max(1, Math.Min(chunks / MIN_BATCH_SIZE, Environment.ProcessorCount));
|
||||
int batchSize = elements / batches;
|
||||
|
||||
fixed (byte* fixedSourcePtr = source)
|
||||
fixed (byte* fixedTargetPtr = target)
|
||||
{
|
||||
byte* sourcePtr = fixedSourcePtr;
|
||||
byte* targetPtr = fixedTargetPtr;
|
||||
|
||||
if (batches == 1)
|
||||
{
|
||||
byte* src = sourcePtr;
|
||||
byte* tar = targetPtr;
|
||||
|
||||
int chunkCount = Math.Max(0, (batchSize / elementsPerVector) - 1);
|
||||
int missingElements = batchSize - (chunkCount * elementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
|
||||
src += bytesPerVector;
|
||||
tar += bytesPerVector;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * BPP) + 0] = src[(i * BPP) + maskVector[0]];
|
||||
tar[(i * BPP) + 1] = src[(i * BPP) + maskVector[1]];
|
||||
tar[(i * BPP) + 2] = src[(i * BPP) + maskVector[2]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(0, batches, Process);
|
||||
|
||||
int missing = elements - (batchSize * batches);
|
||||
if (missing > 0)
|
||||
{
|
||||
byte* missingSrc = sourcePtr + (batches * batchSize * BPP);
|
||||
byte* missingTar = targetPtr + (batches * batchSize * BPP);
|
||||
|
||||
for (int i = 0; i < missing; i++)
|
||||
{
|
||||
missingTar[(i * BPP) + 0] = missingSrc[(i * BPP) + maskVector[0]];
|
||||
missingTar[(i * BPP) + 1] = missingSrc[(i * BPP) + maskVector[1]];
|
||||
missingTar[(i * BPP) + 2] = missingSrc[(i * BPP) + maskVector[2]];
|
||||
}
|
||||
}
|
||||
|
||||
void Process(int index)
|
||||
{
|
||||
int offset = index * batchSize;
|
||||
byte* src = sourcePtr + (offset * BPP);
|
||||
byte* tar = targetPtr + (offset * BPP);
|
||||
|
||||
int chunkCount = Math.Max(0, (batchSize / elementsPerVector) - 1);
|
||||
int missingElements = batchSize - (chunkCount * elementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
|
||||
src += bytesPerVector;
|
||||
tar += bytesPerVector;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * BPP) + 0] = src[(i * BPP) + maskVector[0]];
|
||||
tar[(i * BPP) + 1] = src[(i * BPP) + maskVector[1]];
|
||||
tar[(i * BPP) + 2] = src[(i * BPP) + maskVector[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void Convert4Bytes(ReadOnlySpan<byte> source, Span<byte> target, IColorFormat sourceFormat, IColorFormat targetFormat)
|
||||
{
|
||||
const int BPP = 4;
|
||||
|
||||
ReadOnlySpan<byte> sourceMapping = sourceFormat.ByteMapping;
|
||||
ReadOnlySpan<byte> targetMapping = targetFormat.ByteMapping;
|
||||
|
||||
@ -166,26 +259,31 @@ public static unsafe partial class PixelHelper
|
||||
(byte)(mapping[3] + 12),
|
||||
];
|
||||
|
||||
ConvertSameBpp(source, target, mask, 4);
|
||||
}
|
||||
|
||||
private static void ConvertSameBpp(ReadOnlySpan<byte> source, Span<byte> target, ReadOnlySpan<byte> mask, int bpp)
|
||||
{
|
||||
int elementsPerVector = Vector128<byte>.Count / bpp;
|
||||
int bytesPerVector = elementsPerVector * bpp;
|
||||
|
||||
int chunks = source.Length / bytesPerVector;
|
||||
Vector128<byte> maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask));
|
||||
|
||||
int missingElements = (source.Length - (chunks * bytesPerVector)) / bpp;
|
||||
int elements = source.Length / BPP;
|
||||
int elementsPerVector = Vector128<byte>.Count / BPP;
|
||||
int bytesPerVector = elementsPerVector * BPP;
|
||||
|
||||
fixed (byte* sourcePtr = source)
|
||||
fixed (byte* targetPtr = target)
|
||||
int chunks = elements / elementsPerVector;
|
||||
int batches = Math.Max(1, Math.Min(chunks / MIN_BATCH_SIZE, Environment.ProcessorCount));
|
||||
int batchSize = elements / batches;
|
||||
|
||||
fixed (byte* fixedSourcePtr = source)
|
||||
fixed (byte* fixedTargetPtr = target)
|
||||
{
|
||||
byte* sourcePtr = fixedSourcePtr;
|
||||
byte* targetPtr = fixedTargetPtr;
|
||||
|
||||
if (batches == 1)
|
||||
{
|
||||
byte* src = sourcePtr;
|
||||
byte* tar = targetPtr;
|
||||
|
||||
for (int i = 0; i < chunks; i++)
|
||||
int chunkCount = batchSize / elementsPerVector;
|
||||
int missingElements = batchSize - (chunkCount * elementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
@ -194,21 +292,73 @@ public static unsafe partial class PixelHelper
|
||||
tar += bytesPerVector;
|
||||
}
|
||||
|
||||
Span<byte> buffer = stackalloc byte[missingElements * bpp]; // DarthAffe 08.07.2024: This is fine as it's always < 16 bytes
|
||||
for (int j = 0; j < buffer.Length; j++)
|
||||
buffer[j] = src[mask[j]];
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * BPP) + 0] = src[(i * BPP) + maskVector[0]];
|
||||
tar[(i * BPP) + 1] = src[(i * BPP) + maskVector[1]];
|
||||
tar[(i * BPP) + 2] = src[(i * BPP) + maskVector[2]];
|
||||
tar[(i * BPP) + 3] = src[(i * BPP) + maskVector[3]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(0, batches, Process);
|
||||
|
||||
buffer.CopyTo(new Span<byte>(tar, buffer.Length));
|
||||
int missing = elements - (batchSize * batches);
|
||||
if (missing > 0)
|
||||
{
|
||||
byte* missingSrc = sourcePtr + (batches * batchSize * BPP);
|
||||
byte* missingTar = targetPtr + (batches * batchSize * BPP);
|
||||
|
||||
for (int i = 0; i < missing; i++)
|
||||
{
|
||||
missingTar[(i * BPP) + 0] = missingSrc[(i * BPP) + maskVector[0]];
|
||||
missingTar[(i * BPP) + 1] = missingSrc[(i * BPP) + maskVector[1]];
|
||||
missingTar[(i * BPP) + 2] = missingSrc[(i * BPP) + maskVector[2]];
|
||||
missingTar[(i * BPP) + 3] = missingSrc[(i * BPP) + maskVector[3]];
|
||||
}
|
||||
}
|
||||
|
||||
void Process(int index)
|
||||
{
|
||||
int offset = index * batchSize;
|
||||
byte* src = sourcePtr + (offset * BPP);
|
||||
byte* tar = targetPtr + (offset * BPP);
|
||||
|
||||
int chunkCount = batchSize / elementsPerVector;
|
||||
int missingElements = batchSize - (chunkCount * elementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
|
||||
src += bytesPerVector;
|
||||
tar += bytesPerVector;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * BPP) + 0] = src[(i * BPP) + maskVector[0]];
|
||||
tar[(i * BPP) + 1] = src[(i * BPP) + maskVector[1]];
|
||||
tar[(i * BPP) + 2] = src[(i * BPP) + maskVector[2]];
|
||||
tar[(i * BPP) + 3] = src[(i * BPP) + maskVector[3]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConvertWiden3To4Bytes(ReadOnlySpan<byte> source, Span<byte> target, IColorFormat sourceFormat, IColorFormat targetFormat)
|
||||
{
|
||||
const int SOURCE_BPP = 3;
|
||||
const int TARGET_BPP = 4;
|
||||
|
||||
ReadOnlySpan<byte> sourceMapping = sourceFormat.ByteMapping;
|
||||
ReadOnlySpan<byte> targetMapping = targetFormat.ByteMapping;
|
||||
|
||||
// DarthAffe 08.07.2024: For now alpha is the only thing to be added
|
||||
Span<byte> isAlpha =
|
||||
byte[] isAlpha =
|
||||
[
|
||||
targetMapping[0] == Color.A ? byte.MaxValue : (byte)0,
|
||||
targetMapping[1] == Color.A ? byte.MaxValue : (byte)0,
|
||||
@ -270,26 +420,33 @@ public static unsafe partial class PixelHelper
|
||||
isAlpha[3],
|
||||
];
|
||||
|
||||
int sourceBpp = sourceFormat.BytesPerPixel;
|
||||
int targetBpp = targetFormat.BytesPerPixel;
|
||||
|
||||
int targetElementsPerVector = Vector128<byte>.Count / targetBpp;
|
||||
int targetBytesPerVector = targetElementsPerVector * targetBpp;
|
||||
int sourceBytesPerVector = targetElementsPerVector * sourceBpp;
|
||||
|
||||
int chunks = (source.Length / sourceBytesPerVector);
|
||||
Vector128<byte> maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask));
|
||||
Vector128<byte> alphaMaskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(alphaMask));
|
||||
|
||||
int missingElements = (source.Length - (chunks * sourceBytesPerVector)) / sourceBpp;
|
||||
int elements = source.Length / SOURCE_BPP;
|
||||
int targetElementsPerVector = Vector128<byte>.Count / TARGET_BPP;
|
||||
int sourceBytesPerVector = targetElementsPerVector * SOURCE_BPP;
|
||||
int targetBytesPerVector = targetElementsPerVector * TARGET_BPP;
|
||||
|
||||
fixed (byte* sourcePtr = source)
|
||||
fixed (byte* targetPtr = target)
|
||||
int chunks = elements / targetElementsPerVector;
|
||||
int batches = Math.Max(1, Math.Min(chunks / MIN_BATCH_SIZE, Environment.ProcessorCount));
|
||||
int batchSize = elements / batches;
|
||||
|
||||
fixed (byte* fixedSourcePtr = source)
|
||||
fixed (byte* fixedTargetPtr = target)
|
||||
{
|
||||
byte* sourcePtr = fixedSourcePtr;
|
||||
byte* targetPtr = fixedTargetPtr;
|
||||
|
||||
if (batches == 1)
|
||||
{
|
||||
byte* src = sourcePtr;
|
||||
byte* tar = targetPtr;
|
||||
|
||||
for (int i = 0; i < chunks; i++)
|
||||
int chunkCount = batchSize / targetElementsPerVector;
|
||||
int missingElements = batchSize - (chunkCount * targetElementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128<byte> shuffled = Vector128.Shuffle(vector, maskVector);
|
||||
@ -299,17 +456,69 @@ public static unsafe partial class PixelHelper
|
||||
tar += targetBytesPerVector;
|
||||
}
|
||||
|
||||
Span<byte> buffer = stackalloc byte[missingElements * targetBpp]; // DarthAffe 08.07.2024: This is fine as it's always < 16 bytes
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
for (int j = 0; j < targetBpp; j++)
|
||||
buffer[(i * targetBpp) + j] = Math.Max(isAlpha[j], src[(i * sourceBpp) + mask[j]]);
|
||||
{
|
||||
tar[(i * TARGET_BPP) + 0] = Math.Max(isAlpha[0], src[(i * SOURCE_BPP) + maskVector[0]]);
|
||||
tar[(i * TARGET_BPP) + 1] = Math.Max(isAlpha[1], src[(i * SOURCE_BPP) + maskVector[1]]);
|
||||
tar[(i * TARGET_BPP) + 2] = Math.Max(isAlpha[2], src[(i * SOURCE_BPP) + maskVector[2]]);
|
||||
tar[(i * TARGET_BPP) + 3] = Math.Max(isAlpha[3], src[(i * SOURCE_BPP) + maskVector[3]]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(0, batches, Process);
|
||||
|
||||
buffer.CopyTo(new Span<byte>(tar, buffer.Length));
|
||||
int missing = elements - (batchSize * batches);
|
||||
if (missing > 0)
|
||||
{
|
||||
byte* missingSrc = sourcePtr + (batches * batchSize * SOURCE_BPP);
|
||||
byte* missingTar = targetPtr + (batches * batchSize * TARGET_BPP);
|
||||
|
||||
for (int i = 0; i < missing; i++)
|
||||
{
|
||||
missingTar[(i * TARGET_BPP) + 0] = Math.Max(isAlpha[0], missingSrc[(i * SOURCE_BPP) + maskVector[0]]);
|
||||
missingTar[(i * TARGET_BPP) + 1] = Math.Max(isAlpha[1], missingSrc[(i * SOURCE_BPP) + maskVector[1]]);
|
||||
missingTar[(i * TARGET_BPP) + 2] = Math.Max(isAlpha[2], missingSrc[(i * SOURCE_BPP) + maskVector[2]]);
|
||||
missingTar[(i * TARGET_BPP) + 3] = Math.Max(isAlpha[3], missingSrc[(i * SOURCE_BPP) + maskVector[3]]);
|
||||
}
|
||||
}
|
||||
|
||||
void Process(int index)
|
||||
{
|
||||
int offset = index * batchSize;
|
||||
byte* src = sourcePtr + (offset * SOURCE_BPP);
|
||||
byte* tar = targetPtr + (offset * TARGET_BPP);
|
||||
|
||||
int chunkCount = batchSize / targetElementsPerVector;
|
||||
int missingElements = batchSize - (chunkCount * targetElementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128<byte> shuffled = Vector128.Shuffle(vector, maskVector);
|
||||
Vector128.BitwiseOr(shuffled, alphaMaskVector).Store(tar);
|
||||
|
||||
src += sourceBytesPerVector;
|
||||
tar += targetBytesPerVector;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * TARGET_BPP) + 0] = Math.Max(isAlpha[0], src[(i * SOURCE_BPP) + maskVector[0]]);
|
||||
tar[(i * TARGET_BPP) + 1] = Math.Max(isAlpha[1], src[(i * SOURCE_BPP) + maskVector[1]]);
|
||||
tar[(i * TARGET_BPP) + 2] = Math.Max(isAlpha[2], src[(i * SOURCE_BPP) + maskVector[2]]);
|
||||
tar[(i * TARGET_BPP) + 3] = Math.Max(isAlpha[3], src[(i * SOURCE_BPP) + maskVector[3]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void ConvertNarrow4To3Bytes(ReadOnlySpan<byte> source, Span<byte> target, IColorFormat sourceFormat, IColorFormat targetFormat)
|
||||
{
|
||||
const int SOURCE_BPP = 4;
|
||||
const int TARGET_BPP = 3;
|
||||
|
||||
ReadOnlySpan<byte> sourceMapping = sourceFormat.ByteMapping;
|
||||
ReadOnlySpan<byte> targetMapping = targetFormat.ByteMapping;
|
||||
|
||||
@ -340,25 +549,32 @@ public static unsafe partial class PixelHelper
|
||||
15
|
||||
];
|
||||
|
||||
int sourceBpp = sourceFormat.BytesPerPixel;
|
||||
int targetBpp = targetFormat.BytesPerPixel;
|
||||
|
||||
int sourceElementsPerVector = Vector128<byte>.Count / sourceBpp;
|
||||
int sourceBytesPerVector = sourceElementsPerVector * sourceBpp;
|
||||
int targetBytesPerVector = sourceElementsPerVector * targetBpp;
|
||||
|
||||
int chunks = (source.Length / sourceBytesPerVector) - 1; // DarthAffe 08.07.2024: -1 since we don't have enough space to copy a full target vector for the last set
|
||||
Vector128<byte> maskVector = Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(mask));
|
||||
|
||||
int missingElements = (source.Length - (chunks * sourceBytesPerVector)) / sourceBpp;
|
||||
int elements = source.Length / SOURCE_BPP;
|
||||
int sourceElementsPerVector = Vector128<byte>.Count / SOURCE_BPP;
|
||||
int sourceBytesPerVector = sourceElementsPerVector * SOURCE_BPP;
|
||||
int targetBytesPerVector = sourceElementsPerVector * TARGET_BPP;
|
||||
|
||||
fixed (byte* sourcePtr = source)
|
||||
fixed (byte* targetPtr = target)
|
||||
int chunks = elements / sourceElementsPerVector;
|
||||
int batches = Math.Max(1, Math.Min(chunks / MIN_BATCH_SIZE, Environment.ProcessorCount));
|
||||
int batchSize = elements / batches;
|
||||
|
||||
fixed (byte* fixedSourcePtr = source)
|
||||
fixed (byte* fixedTargetPtr = target)
|
||||
{
|
||||
byte* sourcePtr = fixedSourcePtr;
|
||||
byte* targetPtr = fixedTargetPtr;
|
||||
|
||||
if (batches == 1)
|
||||
{
|
||||
byte* src = sourcePtr;
|
||||
byte* tar = targetPtr;
|
||||
|
||||
for (int i = 0; i < chunks; i++)
|
||||
int chunkCount = Math.Max(0, (batchSize / sourceElementsPerVector) - 1); // DarthAffe 08.07.2024: -1 since we don't have enough space to copy a full target vector for the last set
|
||||
int missingElements = batchSize - (chunkCount * sourceElementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
@ -367,12 +583,57 @@ public static unsafe partial class PixelHelper
|
||||
tar += targetBytesPerVector;
|
||||
}
|
||||
|
||||
Span<byte> buffer = stackalloc byte[missingElements * targetBpp]; // DarthAffe 08.07.2024: This is fine as it's always < 24 bytes
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
for (int j = 0; j < targetBpp; j++)
|
||||
buffer[(i * targetBpp) + j] = src[(i * sourceBpp) + mask[j]];
|
||||
{
|
||||
tar[(i * TARGET_BPP) + 0] = src[(i * SOURCE_BPP) + mapping[0]];
|
||||
tar[(i * TARGET_BPP) + 1] = src[(i * SOURCE_BPP) + mapping[1]];
|
||||
tar[(i * TARGET_BPP) + 2] = src[(i * SOURCE_BPP) + mapping[2]];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(0, batches, Process);
|
||||
|
||||
buffer.CopyTo(new Span<byte>(tar, buffer.Length));
|
||||
int missing = elements - (batchSize * batches);
|
||||
if (missing > 0)
|
||||
{
|
||||
byte* missingSrc = sourcePtr + (batches * batchSize * SOURCE_BPP);
|
||||
byte* missingTar = targetPtr + (batches * batchSize * TARGET_BPP);
|
||||
|
||||
for (int i = 0; i < missing; i++)
|
||||
{
|
||||
missingTar[(i * TARGET_BPP) + 0] = missingSrc[(i * SOURCE_BPP) + maskVector[0]];
|
||||
missingTar[(i * TARGET_BPP) + 1] = missingSrc[(i * SOURCE_BPP) + maskVector[1]];
|
||||
missingTar[(i * TARGET_BPP) + 2] = missingSrc[(i * SOURCE_BPP) + maskVector[2]];
|
||||
}
|
||||
}
|
||||
|
||||
void Process(int index)
|
||||
{
|
||||
int offset = index * batchSize;
|
||||
byte* src = sourcePtr + (offset * SOURCE_BPP);
|
||||
byte* tar = targetPtr + (offset * TARGET_BPP);
|
||||
|
||||
int chunkCount = Math.Max(0, (batchSize / sourceElementsPerVector) - 1); // DarthAffe 08.07.2024: -1 since we don't have enough space to copy a full target vector for the last set
|
||||
int missingElements = batchSize - (chunkCount * sourceElementsPerVector);
|
||||
|
||||
for (int i = 0; i < chunkCount; i++)
|
||||
{
|
||||
Vector128<byte> vector = Vector128.Load(src);
|
||||
Vector128.Shuffle(vector, maskVector).Store(tar);
|
||||
|
||||
src += sourceBytesPerVector;
|
||||
tar += targetBytesPerVector;
|
||||
}
|
||||
|
||||
for (int i = 0; i < missingElements; i++)
|
||||
{
|
||||
tar[(i * TARGET_BPP) + 0] = src[(i * SOURCE_BPP) + maskVector[0]];
|
||||
tar[(i * TARGET_BPP) + 1] = src[(i * SOURCE_BPP) + maskVector[1]];
|
||||
tar[(i * TARGET_BPP) + 2] = src[(i * SOURCE_BPP) + maskVector[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
12
README.md
12
README.md
@ -98,8 +98,10 @@ All of the included formats can freely be converted between each other.
|
||||
Allocation-free in-place conversion is only supported for formats of same size (both 24 or 32 bit).
|
||||
|
||||
| Method | Mean | Error | StdDev | Allocated |
|
||||
|----------- |---------:|----------:|----------:|----------:|
|
||||
| RGBToBGR | 6.272 ms | 0.0288 ms | 0.0240 ms | 8.81 MB |
|
||||
| RGBToBGRA | 8.534 ms | 0.0684 ms | 0.0640 ms | 11.75 MB |
|
||||
| RGBAToABGR | 8.128 ms | 0.0927 ms | 0.0867 ms | 11.75 MB |
|
||||
| ARGBToBGR | 8.004 ms | 0.0353 ms | 0.0313 ms | 8.81 MB |
|
||||
|------------------- |---------:|----------:|----------:|------------:|
|
||||
| RGBToBGR | 1.487 ms | 0.0221 ms | 0.0196 ms | 9073.58 KB |
|
||||
| RGBToBGRA | 1.676 ms | 0.0330 ms | 0.0353 ms | 12064.76 KB |
|
||||
| RGBAToABGR | 1.766 ms | 0.0348 ms | 0.0476 ms | 12084.93 KB |
|
||||
| ARGBToBGR | 1.533 ms | 0.0072 ms | 0.0064 ms | 9085.36 KB |
|
||||
| RGBToBGR_InPlace | 1.025 ms | 0.0021 ms | 0.0017 ms | 34.47 KB |
|
||||
| RGBAToABGR_InPlace | 1.054 ms | 0.0023 ms | 0.0020 ms | 34.16 KB |
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user