|
@@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Vic.Image
|
|
|
case PixelFormat.A8B8G8R8:
|
|
case PixelFormat.A8B8G8R8:
|
|
|
WriteA8B8G8R8(rm, input, ref config, ref offsets);
|
|
WriteA8B8G8R8(rm, input, ref config, ref offsets);
|
|
|
break;
|
|
break;
|
|
|
|
|
+ case PixelFormat.A8R8G8B8:
|
|
|
|
|
+ WriteA8R8G8B8(rm, input, ref config, ref offsets);
|
|
|
|
|
+ break;
|
|
|
case PixelFormat.Y8___V8U8_N420:
|
|
case PixelFormat.Y8___V8U8_N420:
|
|
|
WriteNv12(rm, input, ref config, ref offsets);
|
|
WriteNv12(rm, input, ref config, ref offsets);
|
|
|
break;
|
|
break;
|
|
@@ -116,6 +119,105 @@ namespace Ryujinx.Graphics.Vic.Image
|
|
|
rm.BufferPool.Return(dstIndex);
|
|
rm.BufferPool.Return(dstIndex);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private unsafe static void WriteA8R8G8B8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
|
|
|
|
|
+ {
|
|
|
|
|
+ int width = input.Width;
|
|
|
|
|
+ int height = input.Height;
|
|
|
|
|
+ int stride = GetPitch(width, 4);
|
|
|
|
|
+
|
|
|
|
|
+ int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst);
|
|
|
|
|
+
|
|
|
|
|
+ if (Ssse3.IsSupported)
|
|
|
|
|
+ {
|
|
|
|
|
+ Vector128<byte> shuffleMask = Vector128.Create(
|
|
|
|
|
+ (byte)2, (byte)1, (byte)0, (byte)3,
|
|
|
|
|
+ (byte)6, (byte)5, (byte)4, (byte)7,
|
|
|
|
|
+ (byte)10, (byte)9, (byte)8, (byte)11,
|
|
|
|
|
+ (byte)14, (byte)13, (byte)12, (byte)15);
|
|
|
|
|
+
|
|
|
|
|
+ int widthTrunc = width & ~7;
|
|
|
|
|
+ int strideGap = stride - width * 4;
|
|
|
|
|
+
|
|
|
|
|
+ fixed (Pixel* srcPtr = input.Data)
|
|
|
|
|
+ {
|
|
|
|
|
+ Pixel* ip = srcPtr;
|
|
|
|
|
+
|
|
|
|
|
+ fixed (byte* dstPtr = dst)
|
|
|
|
|
+ {
|
|
|
|
|
+ byte* op = dstPtr;
|
|
|
|
|
+
|
|
|
|
|
+ for (int y = 0; y < height; y++, ip += input.Width)
|
|
|
|
|
+ {
|
|
|
|
|
+ int x = 0;
|
|
|
|
|
+
|
|
|
|
|
+ for (; x < widthTrunc; x += 8)
|
|
|
|
|
+ {
|
|
|
|
|
+ Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x));
|
|
|
|
|
+ Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2));
|
|
|
|
|
+ Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4));
|
|
|
|
|
+ Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6));
|
|
|
|
|
+
|
|
|
|
|
+ pixel12 = Sse2.ShiftRightLogical(pixel12, 2);
|
|
|
|
|
+ pixel34 = Sse2.ShiftRightLogical(pixel34, 2);
|
|
|
|
|
+ pixel56 = Sse2.ShiftRightLogical(pixel56, 2);
|
|
|
|
|
+ pixel78 = Sse2.ShiftRightLogical(pixel78, 2);
|
|
|
|
|
+
|
|
|
|
|
+ Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16());
|
|
|
|
|
+ Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16());
|
|
|
|
|
+
|
|
|
|
|
+ pixel1234 = Ssse3.Shuffle(pixel1234, shuffleMask);
|
|
|
|
|
+ pixel5678 = Ssse3.Shuffle(pixel5678, shuffleMask);
|
|
|
|
|
+
|
|
|
|
|
+ Sse2.Store(op + 0x00, pixel1234);
|
|
|
|
|
+ Sse2.Store(op + 0x10, pixel5678);
|
|
|
|
|
+
|
|
|
|
|
+ op += 0x20;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (; x < width; x++)
|
|
|
|
|
+ {
|
|
|
|
|
+ Pixel* px = ip + (uint)x;
|
|
|
|
|
+
|
|
|
|
|
+ *(op + 0) = Downsample(px->B);
|
|
|
|
|
+ *(op + 1) = Downsample(px->G);
|
|
|
|
|
+ *(op + 2) = Downsample(px->R);
|
|
|
|
|
+ *(op + 3) = Downsample(px->A);
|
|
|
|
|
+
|
|
|
|
|
+ op += 4;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ op += strideGap;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else
|
|
|
|
|
+ {
|
|
|
|
|
+ for (int y = 0; y < height; y++)
|
|
|
|
|
+ {
|
|
|
|
|
+ int baseOffs = y * stride;
|
|
|
|
|
+
|
|
|
|
|
+ for (int x = 0; x < width; x++)
|
|
|
|
|
+ {
|
|
|
|
|
+ int offs = baseOffs + x * 4;
|
|
|
|
|
+
|
|
|
|
|
+ dst[offs + 0] = Downsample(input.GetB(x, y));
|
|
|
|
|
+ dst[offs + 1] = Downsample(input.GetG(x, y));
|
|
|
|
|
+ dst[offs + 2] = Downsample(input.GetR(x, y));
|
|
|
|
|
+ dst[offs + 3] = Downsample(input.GetA(x, y));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ bool outLinear = config.OutBlkKind == 0;
|
|
|
|
|
+
|
|
|
|
|
+ int gobBlocksInY = 1 << config.OutBlkHeight;
|
|
|
|
|
+
|
|
|
|
|
+ WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY);
|
|
|
|
|
+
|
|
|
|
|
+ rm.BufferPool.Return(dstIndex);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
|
|
private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets)
|
|
|
{
|
|
{
|
|
|
int gobBlocksInY = 1 << config.OutBlkHeight;
|
|
int gobBlocksInY = 1 << config.OutBlkHeight;
|