|
|
@@ -1,6 +1,6 @@
|
|
|
using Ryujinx.Common;
|
|
|
using System;
|
|
|
-
|
|
|
+using System.Runtime.Intrinsics;
|
|
|
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
|
|
|
|
|
|
namespace Ryujinx.Graphics.Texture
|
|
|
@@ -64,11 +64,14 @@ namespace Ryujinx.Graphics.Texture
|
|
|
}
|
|
|
|
|
|
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
|
|
|
+ int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
|
|
|
|
|
|
int xStart = strideTrunc / bytesPerPixel;
|
|
|
|
|
|
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
|
|
|
|
|
+ int outStrideGap = stride - w * bytesPerPixel;
|
|
|
+
|
|
|
int alignment = gobWidth;
|
|
|
|
|
|
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
|
|
|
@@ -86,36 +89,74 @@ namespace Ryujinx.Graphics.Texture
|
|
|
mipGobBlocksInZ,
|
|
|
bytesPerPixel);
|
|
|
|
|
|
- for (int layer = 0; layer < layers; layer++)
|
|
|
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
|
|
{
|
|
|
- int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
|
|
|
-
|
|
|
- for (int z = 0; z < d; z++)
|
|
|
- for (int y = 0; y < h; y++)
|
|
|
+ fixed (byte* outputPtr = output, dataPtr = data)
|
|
|
{
|
|
|
- for (int x = 0; x < strideTrunc; x += 16)
|
|
|
+ byte* outPtr = outputPtr + outOffs;
|
|
|
+ for (int layer = 0; layer < layers; layer++)
|
|
|
{
|
|
|
- int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z);
|
|
|
-
|
|
|
- Span<byte> dest = output.Slice(outOffs + x, 16);
|
|
|
-
|
|
|
- data.Slice(offset, 16).CopyTo(dest);
|
|
|
- }
|
|
|
-
|
|
|
- for (int x = xStart; x < w; x++)
|
|
|
- {
|
|
|
- int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z);
|
|
|
-
|
|
|
- Span<byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel);
|
|
|
-
|
|
|
- data.Slice(offset, bytesPerPixel).CopyTo(dest);
|
|
|
+ byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
|
|
|
+
|
|
|
+ for (int z = 0; z < d; z++)
|
|
|
+ {
|
|
|
+ layoutConverter.SetZ(z);
|
|
|
+ for (int y = 0; y < h; y++)
|
|
|
+ {
|
|
|
+ layoutConverter.SetY(y);
|
|
|
+
|
|
|
+ for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
|
|
|
+ {
|
|
|
+ byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
|
|
|
+ byte* offset2 = offset + 0x20;
|
|
|
+ byte* offset3 = offset + 0x100;
|
|
|
+ byte* offset4 = offset + 0x120;
|
|
|
+
|
|
|
+ Vector128<byte> value = *(Vector128<byte>*)offset;
|
|
|
+ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
|
|
|
+ Vector128<byte> value3 = *(Vector128<byte>*)offset3;
|
|
|
+ Vector128<byte> value4 = *(Vector128<byte>*)offset4;
|
|
|
+
|
|
|
+ *(Vector128<byte>*)outPtr = value;
|
|
|
+ *(Vector128<byte>*)(outPtr + 16) = value2;
|
|
|
+ *(Vector128<byte>*)(outPtr + 32) = value3;
|
|
|
+ *(Vector128<byte>*)(outPtr + 48) = value4;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
|
|
|
+ {
|
|
|
+ byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
|
|
|
+
|
|
|
+ *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
|
|
|
+ {
|
|
|
+ byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
|
|
|
+
|
|
|
+ *(T*)outPtr = *(T*)offset;
|
|
|
+ }
|
|
|
+
|
|
|
+ outPtr += outStrideGap;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
- outOffs += stride;
|
|
|
+ outOffs += stride * h * d * layers;
|
|
|
}
|
|
|
+ return true;
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
+ bool _ = bytesPerPixel switch
|
|
|
+ {
|
|
|
+ 1 => Convert<byte>(output, data),
|
|
|
+ 2 => Convert<ushort>(output, data),
|
|
|
+ 4 => Convert<uint>(output, data),
|
|
|
+ 8 => Convert<ulong>(output, data),
|
|
|
+ 12 => Convert<Bpp12Pixel>(output, data),
|
|
|
+ 16 => Convert<Vector128<byte>>(output, data),
|
|
|
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
|
|
|
+ };
|
|
|
+ }
|
|
|
return output;
|
|
|
}
|
|
|
|
|
|
@@ -132,22 +173,18 @@ namespace Ryujinx.Graphics.Texture
|
|
|
int h = BitUtils.DivRoundUp(height, blockHeight);
|
|
|
|
|
|
int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
|
|
+ int lineSize = w * bytesPerPixel;
|
|
|
|
|
|
Span<byte> output = new byte[h * outStride];
|
|
|
|
|
|
int outOffs = 0;
|
|
|
+ int inOffs = 0;
|
|
|
|
|
|
for (int y = 0; y < h; y++)
|
|
|
{
|
|
|
- for (int x = 0; x < w; x++)
|
|
|
- {
|
|
|
- int offset = y * stride + x * bytesPerPixel;
|
|
|
-
|
|
|
- Span<byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel);
|
|
|
-
|
|
|
- data.Slice(offset, bytesPerPixel).CopyTo(dest);
|
|
|
- }
|
|
|
+ data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
|
|
|
|
|
|
+ inOffs += stride;
|
|
|
outOffs += outStride;
|
|
|
}
|
|
|
|
|
|
@@ -198,8 +235,15 @@ namespace Ryujinx.Graphics.Texture
|
|
|
mipGobBlocksInZ >>= 1;
|
|
|
}
|
|
|
|
|
|
+ int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
|
|
|
+ int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
|
|
|
+
|
|
|
+ int xStart = strideTrunc / bytesPerPixel;
|
|
|
+
|
|
|
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
|
|
|
|
|
+ int inStrideGap = stride - w * bytesPerPixel;
|
|
|
+
|
|
|
int alignment = gobWidth;
|
|
|
|
|
|
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
|
|
|
@@ -217,25 +261,73 @@ namespace Ryujinx.Graphics.Texture
|
|
|
mipGobBlocksInZ,
|
|
|
bytesPerPixel);
|
|
|
|
|
|
- for (int layer = 0; layer < layers; layer++)
|
|
|
+ unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
|
|
|
{
|
|
|
- int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);
|
|
|
-
|
|
|
- for (int z = 0; z < d; z++)
|
|
|
- for (int y = 0; y < h; y++)
|
|
|
+ fixed (byte* outputPtr = output, dataPtr = data)
|
|
|
{
|
|
|
- for (int x = 0; x < w; x++)
|
|
|
+ byte* inPtr = dataPtr + inOffs;
|
|
|
+ for (int layer = 0; layer < layers; layer++)
|
|
|
{
|
|
|
- int offset = outBaseOffset + layoutConverter.GetOffset(x, y, z);
|
|
|
-
|
|
|
- Span<byte> dest = output.Slice(offset, bytesPerPixel);
|
|
|
-
|
|
|
- data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
|
|
|
+ byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
|
|
|
+
|
|
|
+ for (int z = 0; z < d; z++)
|
|
|
+ {
|
|
|
+ layoutConverter.SetZ(z);
|
|
|
+ for (int y = 0; y < h; y++)
|
|
|
+ {
|
|
|
+ layoutConverter.SetY(y);
|
|
|
+
|
|
|
+ for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
|
|
|
+ {
|
|
|
+ byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
|
|
|
+ byte* offset2 = offset + 0x20;
|
|
|
+ byte* offset3 = offset + 0x100;
|
|
|
+ byte* offset4 = offset + 0x120;
|
|
|
+
|
|
|
+ Vector128<byte> value = *(Vector128<byte>*)inPtr;
|
|
|
+ Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
|
|
|
+ Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
|
|
|
+ Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
|
|
|
+
|
|
|
+ *(Vector128<byte>*)offset = value;
|
|
|
+ *(Vector128<byte>*)offset2 = value2;
|
|
|
+ *(Vector128<byte>*)offset3 = value3;
|
|
|
+ *(Vector128<byte>*)offset4 = value4;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
|
|
|
+ {
|
|
|
+ byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
|
|
|
+
|
|
|
+ *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
|
|
|
+ {
|
|
|
+ byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
|
|
|
+
|
|
|
+ *(T*)offset = *(T*)inPtr;
|
|
|
+ }
|
|
|
+
|
|
|
+ inPtr += inStrideGap;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
-
|
|
|
- inOffs += stride;
|
|
|
+ inOffs += stride * h * d * layers;
|
|
|
}
|
|
|
+ return true;
|
|
|
}
|
|
|
+
|
|
|
+ bool _ = bytesPerPixel switch
|
|
|
+ {
|
|
|
+ 1 => Convert<byte>(output, data),
|
|
|
+ 2 => Convert<ushort>(output, data),
|
|
|
+ 4 => Convert<uint>(output, data),
|
|
|
+ 8 => Convert<ulong>(output, data),
|
|
|
+ 12 => Convert<Bpp12Pixel>(output, data),
|
|
|
+ 16 => Convert<Vector128<byte>>(output, data),
|
|
|
+ _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
|
|
|
+ };
|
|
|
}
|
|
|
|
|
|
return output;
|
|
|
@@ -254,23 +346,19 @@ namespace Ryujinx.Graphics.Texture
|
|
|
int h = BitUtils.DivRoundUp(height, blockHeight);
|
|
|
|
|
|
int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
|
|
|
+ int lineSize = width * bytesPerPixel;
|
|
|
|
|
|
Span<byte> output = new byte[h * stride];
|
|
|
|
|
|
int inOffs = 0;
|
|
|
+ int outOffs = 0;
|
|
|
|
|
|
for (int y = 0; y < h; y++)
|
|
|
{
|
|
|
- for (int x = 0; x < w; x++)
|
|
|
- {
|
|
|
- int offset = y * stride + x * bytesPerPixel;
|
|
|
-
|
|
|
- Span<byte> dest = output.Slice(offset, bytesPerPixel);
|
|
|
-
|
|
|
- data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest);
|
|
|
- }
|
|
|
+ data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
|
|
|
|
|
|
inOffs += inStride;
|
|
|
+ outOffs += stride;
|
|
|
}
|
|
|
|
|
|
return output;
|