SurfaceReader.cs 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. using Ryujinx.Common.Logging;
  2. using Ryujinx.Graphics.Gpu.Memory;
  3. using Ryujinx.Graphics.Texture;
  4. using Ryujinx.Graphics.Vic.Types;
  5. using System;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.Intrinsics;
  8. using System.Runtime.Intrinsics.X86;
  9. using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
  10. namespace Ryujinx.Graphics.Vic.Image
  11. {
  12. static class SurfaceReader
  13. {
  14. public static Surface Read(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets)
  15. {
  16. switch (config.SlotPixelFormat)
  17. {
  18. case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref offsets);
  19. }
  20. Logger.PrintError(LogClass.Vic, $"Unsupported pixel format \"{config.SlotPixelFormat}\".");
  21. int lw = config.SlotLumaWidth + 1;
  22. int lh = config.SlotLumaHeight + 1;
  23. return new Surface(rm.SurfacePool, lw, lh);
  24. }
  25. private unsafe static Surface ReadNv12(ResourceManager rm, ref SlotSurfaceConfig config, ref PlaneOffsets offsets)
  26. {
  27. InputSurface input = ReadSurface(rm.Gmm, ref config, ref offsets, 1, 2);
  28. int width = input.Width;
  29. int height = input.Height;
  30. int yStride = GetPitch(width, 1);
  31. int uvStride = GetPitch(input.UvWidth, 2);
  32. Surface output = new Surface(rm.SurfacePool, width, height);
  33. if (Sse41.IsSupported)
  34. {
  35. Vector128<byte> shufMask = Vector128.Create(
  36. (byte)0, (byte)2, (byte)3, (byte)1,
  37. (byte)4, (byte)6, (byte)7, (byte)5,
  38. (byte)8, (byte)10, (byte)11, (byte)9,
  39. (byte)12, (byte)14, (byte)15, (byte)13);
  40. Vector128<short> alphaMask = Vector128.Create(0xffUL << 48).AsInt16();
  41. int yStrideGap = yStride - width;
  42. int uvStrideGap = uvStride - input.UvWidth;
  43. int widthTrunc = width & ~0xf;
  44. fixed (Pixel* dstPtr = output.Data)
  45. {
  46. Pixel* op = dstPtr;
  47. fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
  48. {
  49. byte* i0p = src0Ptr;
  50. for (int y = 0; y < height; y++)
  51. {
  52. byte* i1p = src1Ptr + (y >> 1) * uvStride;
  53. int x = 0;
  54. for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
  55. {
  56. Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p);
  57. Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8);
  58. Vector128<byte> uv = Sse2.LoadVector128(i1p);
  59. Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16());
  60. Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16());
  61. Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0);
  62. Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0);
  63. Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1);
  64. Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1);
  65. rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16();
  66. rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16();
  67. rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16();
  68. rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16();
  69. Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte());
  70. Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte()));
  71. Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte());
  72. Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte()));
  73. Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte());
  74. Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte()));
  75. Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte());
  76. Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte()));
  77. rgba16_0 = Sse2.Or(rgba16_0, alphaMask);
  78. rgba16_1 = Sse2.Or(rgba16_1, alphaMask);
  79. rgba16_2 = Sse2.Or(rgba16_2, alphaMask);
  80. rgba16_3 = Sse2.Or(rgba16_3, alphaMask);
  81. rgba16_4 = Sse2.Or(rgba16_4, alphaMask);
  82. rgba16_5 = Sse2.Or(rgba16_5, alphaMask);
  83. rgba16_6 = Sse2.Or(rgba16_6, alphaMask);
  84. rgba16_7 = Sse2.Or(rgba16_7, alphaMask);
  85. rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2);
  86. rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2);
  87. rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2);
  88. rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2);
  89. rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2);
  90. rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2);
  91. rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2);
  92. rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2);
  93. Sse2.Store((short*)(op + (uint)x + 0), rgba16_0);
  94. Sse2.Store((short*)(op + (uint)x + 2), rgba16_1);
  95. Sse2.Store((short*)(op + (uint)x + 4), rgba16_2);
  96. Sse2.Store((short*)(op + (uint)x + 6), rgba16_3);
  97. Sse2.Store((short*)(op + (uint)x + 8), rgba16_4);
  98. Sse2.Store((short*)(op + (uint)x + 10), rgba16_5);
  99. Sse2.Store((short*)(op + (uint)x + 12), rgba16_6);
  100. Sse2.Store((short*)(op + (uint)x + 14), rgba16_7);
  101. }
  102. for (; x < width; x++, i1p += (x & 1) * 2)
  103. {
  104. Pixel* px = op + (uint)x;
  105. px->R = Upsample(*i0p++);
  106. px->G = Upsample(*i1p);
  107. px->B = Upsample(*(i1p + 1));
  108. px->A = 0x3ff;
  109. }
  110. op += width;
  111. i0p += yStrideGap;
  112. i1p += uvStrideGap;
  113. }
  114. }
  115. }
  116. }
  117. else
  118. {
  119. for (int y = 0; y < height; y++)
  120. {
  121. int uvBase = (y >> 1) * uvStride;
  122. for (int x = 0; x < width; x++)
  123. {
  124. output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x]));
  125. int uvOffs = uvBase + (x & ~1);
  126. output.SetG(x, y, Upsample(input.Buffer1[uvOffs]));
  127. output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1]));
  128. output.SetA(x, y, 0x3ff);
  129. }
  130. }
  131. }
  132. return output;
  133. }
  134. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  135. private static Vector128<byte> HighToLow(Vector128<byte> value)
  136. {
  137. return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte();
  138. }
  139. private static InputSurface ReadSurface(
  140. MemoryManager gmm,
  141. ref SlotSurfaceConfig config,
  142. ref PlaneOffsets offsets,
  143. int bytesPerPixel,
  144. int planes)
  145. {
  146. InputSurface surface = new InputSurface();
  147. int gobBlocksInY = 1 << config.SlotBlkHeight;
  148. bool linear = config.SlotBlkKind == 0;
  149. int lw = config.SlotLumaWidth + 1;
  150. int lh = config.SlotLumaHeight + 1;
  151. int cw = config.SlotChromaWidth + 1;
  152. int ch = config.SlotChromaHeight + 1;
  153. surface.Width = lw;
  154. surface.Height = lh;
  155. surface.UvWidth = cw;
  156. surface.UvHeight = ch;
  157. if (planes > 0)
  158. {
  159. surface.Buffer0 = ReadBuffer(gmm, offsets.LumaOffset, linear, lw, lh, bytesPerPixel, gobBlocksInY);
  160. }
  161. if (planes > 1)
  162. {
  163. surface.Buffer1 = ReadBuffer(gmm, offsets.ChromaUOffset, linear, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY);
  164. }
  165. if (planes > 2)
  166. {
  167. surface.Buffer2 = ReadBuffer(gmm, offsets.ChromaVOffset, linear, cw, ch, 1, gobBlocksInY);
  168. }
  169. return surface;
  170. }
  171. private static ReadOnlySpan<byte> ReadBuffer(
  172. MemoryManager gmm,
  173. uint offset,
  174. bool linear,
  175. int width,
  176. int height,
  177. int bytesPerPixel,
  178. int gobBlocksInY)
  179. {
  180. int stride = GetPitch(width, bytesPerPixel);
  181. if (linear)
  182. {
  183. return gmm.GetSpan(ExtendOffset(offset), stride * height);
  184. }
  185. return ReadBuffer(gmm, offset, width, height, stride, bytesPerPixel, gobBlocksInY);
  186. }
  187. private static ReadOnlySpan<byte> ReadBuffer(
  188. MemoryManager gmm,
  189. uint offset,
  190. int width,
  191. int height,
  192. int dstStride,
  193. int bytesPerPixel,
  194. int gobBlocksInY)
  195. {
  196. int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY);
  197. ReadOnlySpan<byte> src = gmm.GetSpan(ExtendOffset(offset), inSize);
  198. Span<byte> dst = new byte[dstStride * height];
  199. LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src);
  200. return dst;
  201. }
  202. }
  203. }