SurfaceReader.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. using Ryujinx.Common.Logging;
  2. using Ryujinx.Common.Memory;
  3. using Ryujinx.Graphics.Texture;
  4. using Ryujinx.Graphics.Vic.Types;
  5. using System;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.Intrinsics;
  8. using System.Runtime.Intrinsics.Arm;
  9. using System.Runtime.Intrinsics.X86;
  10. using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
  11. namespace Ryujinx.Graphics.Vic.Image
  12. {
  13. static class SurfaceReader
  14. {
  15. public static Surface Read(
  16. ResourceManager rm,
  17. ref SlotConfig config,
  18. ref SlotSurfaceConfig surfaceConfig,
  19. ref Array8<PlaneOffsets> offsets)
  20. {
  21. switch (surfaceConfig.SlotPixelFormat)
  22. {
  23. case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets);
  24. }
  25. Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\".");
  26. int lw = surfaceConfig.SlotLumaWidth + 1;
  27. int lh = surfaceConfig.SlotLumaHeight + 1;
  28. return new Surface(rm.SurfacePool, lw, lh);
  29. }
  30. private unsafe static Surface ReadNv12(
  31. ResourceManager rm,
  32. ref SlotConfig config,
  33. ref SlotSurfaceConfig surfaceConfig,
  34. ref Array8<PlaneOffsets> offsets)
  35. {
  36. InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2);
  37. int width = input.Width;
  38. int height = input.Height;
  39. int yStride = GetPitch(width, 1);
  40. int uvStride = GetPitch(input.UvWidth, 2);
  41. Surface output = new Surface(rm.SurfacePool, width, height);
  42. if (Sse41.IsSupported)
  43. {
  44. Vector128<byte> shufMask = Vector128.Create(
  45. (byte)0, (byte)2, (byte)3, (byte)1,
  46. (byte)4, (byte)6, (byte)7, (byte)5,
  47. (byte)8, (byte)10, (byte)11, (byte)9,
  48. (byte)12, (byte)14, (byte)15, (byte)13);
  49. Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16();
  50. int yStrideGap = yStride - width;
  51. int uvStrideGap = uvStride - input.UvWidth;
  52. int widthTrunc = width & ~0xf;
  53. fixed (Pixel* dstPtr = output.Data)
  54. {
  55. Pixel* op = dstPtr;
  56. fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
  57. {
  58. byte* i0p = src0Ptr;
  59. for (int y = 0; y < height; y++)
  60. {
  61. byte* i1p = src1Ptr + (y >> 1) * uvStride;
  62. int x = 0;
  63. for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
  64. {
  65. Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p);
  66. Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8);
  67. Vector128<byte> uv = Sse2.LoadVector128(i1p);
  68. Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16());
  69. Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16());
  70. Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0);
  71. Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0);
  72. Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1);
  73. Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1);
  74. rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16();
  75. rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16();
  76. rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16();
  77. rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16();
  78. rgba0 = Sse2.Or(rgba0, alphaMask);
  79. rgba1 = Sse2.Or(rgba1, alphaMask);
  80. rgba2 = Sse2.Or(rgba2, alphaMask);
  81. rgba3 = Sse2.Or(rgba3, alphaMask);
  82. Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte());
  83. Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte()));
  84. Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte());
  85. Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte()));
  86. Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte());
  87. Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte()));
  88. Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte());
  89. Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte()));
  90. rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2);
  91. rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2);
  92. rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2);
  93. rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2);
  94. rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2);
  95. rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2);
  96. rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2);
  97. rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2);
  98. Sse2.Store((short*)(op + (uint)x + 0), rgba16_0);
  99. Sse2.Store((short*)(op + (uint)x + 2), rgba16_1);
  100. Sse2.Store((short*)(op + (uint)x + 4), rgba16_2);
  101. Sse2.Store((short*)(op + (uint)x + 6), rgba16_3);
  102. Sse2.Store((short*)(op + (uint)x + 8), rgba16_4);
  103. Sse2.Store((short*)(op + (uint)x + 10), rgba16_5);
  104. Sse2.Store((short*)(op + (uint)x + 12), rgba16_6);
  105. Sse2.Store((short*)(op + (uint)x + 14), rgba16_7);
  106. }
  107. for (; x < width; x++, i1p += (x & 1) * 2)
  108. {
  109. Pixel* px = op + (uint)x;
  110. px->R = Upsample(*i0p++);
  111. px->G = Upsample(*i1p);
  112. px->B = Upsample(*(i1p + 1));
  113. px->A = 0x3ff;
  114. }
  115. op += width;
  116. i0p += yStrideGap;
  117. i1p += uvStrideGap;
  118. }
  119. }
  120. }
  121. }
  122. else if (AdvSimd.Arm64.IsSupported)
  123. {
  124. Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32();
  125. int yStrideGap = yStride - width;
  126. int uvStrideGap = uvStride - input.UvWidth;
  127. int widthTrunc = width & ~0xf;
  128. fixed (Pixel* dstPtr = output.Data)
  129. {
  130. Pixel* op = dstPtr;
  131. fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
  132. {
  133. byte* i0p = src0Ptr;
  134. for (int y = 0; y < height; y++)
  135. {
  136. byte* i1p = src1Ptr + (y >> 1) * uvStride;
  137. int x = 0;
  138. for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
  139. {
  140. Vector128<byte> ya = AdvSimd.LoadVector128(i0p);
  141. Vector128<byte> uv = AdvSimd.LoadVector128(i1p);
  142. Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16();
  143. Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16();
  144. Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16());
  145. Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16());
  146. ya0 = AdvSimd.ShiftLeftLogical(ya0, 8);
  147. ya1 = AdvSimd.ShiftLeftLogical(ya1, 8);
  148. Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0);
  149. Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0);
  150. Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1);
  151. Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1);
  152. rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16();
  153. rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16();
  154. rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16();
  155. rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16();
  156. Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16();
  157. Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16();
  158. Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16();
  159. Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16();
  160. Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16();
  161. Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16();
  162. Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16();
  163. Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16();
  164. rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2);
  165. rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2);
  166. rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2);
  167. rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2);
  168. rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2);
  169. rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2);
  170. rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2);
  171. rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2);
  172. AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0);
  173. AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1);
  174. AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2);
  175. AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3);
  176. AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4);
  177. AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5);
  178. AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6);
  179. AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7);
  180. }
  181. for (; x < width; x++, i1p += (x & 1) * 2)
  182. {
  183. Pixel* px = op + (uint)x;
  184. px->R = Upsample(*i0p++);
  185. px->G = Upsample(*i1p);
  186. px->B = Upsample(*(i1p + 1));
  187. px->A = 0x3ff;
  188. }
  189. op += width;
  190. i0p += yStrideGap;
  191. i1p += uvStrideGap;
  192. }
  193. }
  194. }
  195. }
  196. else
  197. {
  198. for (int y = 0; y < height; y++)
  199. {
  200. int uvBase = (y >> 1) * uvStride;
  201. for (int x = 0; x < width; x++)
  202. {
  203. output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x]));
  204. int uvOffs = uvBase + (x & ~1);
  205. output.SetG(x, y, Upsample(input.Buffer1[uvOffs]));
  206. output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1]));
  207. output.SetA(x, y, 0x3ff);
  208. }
  209. }
  210. }
  211. input.Return(rm.BufferPool);
  212. return output;
  213. }
  214. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  215. private static Vector128<byte> HighToLow(Vector128<byte> value)
  216. {
  217. return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte();
  218. }
  219. private static InputSurface ReadSurface(
  220. ResourceManager rm,
  221. ref SlotConfig config,
  222. ref SlotSurfaceConfig surfaceConfig,
  223. ref Array8<PlaneOffsets> offsets,
  224. int bytesPerPixel,
  225. int planes)
  226. {
  227. InputSurface surface = new InputSurface();
  228. surface.Initialize();
  229. int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight;
  230. bool linear = surfaceConfig.SlotBlkKind == 0;
  231. int lw = surfaceConfig.SlotLumaWidth + 1;
  232. int lh = surfaceConfig.SlotLumaHeight + 1;
  233. int cw = surfaceConfig.SlotChromaWidth + 1;
  234. int ch = surfaceConfig.SlotChromaHeight + 1;
  235. // Interlaced inputs have double the height when deinterlaced.
  236. int heightShift = config.FrameFormat.IsField() ? 1 : 0;
  237. surface.Width = lw;
  238. surface.Height = lh << heightShift;
  239. surface.UvWidth = cw;
  240. surface.UvHeight = ch << heightShift;
  241. if (planes > 0)
  242. {
  243. surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY));
  244. }
  245. if (planes > 1)
  246. {
  247. surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY));
  248. }
  249. if (planes > 2)
  250. {
  251. surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY));
  252. }
  253. return surface;
  254. }
  255. private static RentedBuffer ReadBuffer(
  256. ResourceManager rm,
  257. scoped ref SlotConfig config,
  258. scoped ref Array8<PlaneOffsets> offsets,
  259. bool linear,
  260. int plane,
  261. int width,
  262. int height,
  263. int bytesPerPixel,
  264. int gobBlocksInY)
  265. {
  266. FrameFormat frameFormat = config.FrameFormat;
  267. bool isLuma = plane == 0;
  268. bool isField = frameFormat.IsField();
  269. bool isTopField = frameFormat.IsTopField(isLuma);
  270. int stride = GetPitch(width, bytesPerPixel);
  271. uint offset = GetOffset(ref offsets[0], plane);
  272. int dstStart = 0;
  273. int dstStride = stride;
  274. if (isField)
  275. {
  276. dstStart = isTopField ? 0 : stride;
  277. dstStride = stride * 2;
  278. }
  279. RentedBuffer buffer;
  280. if (linear)
  281. {
  282. buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel);
  283. }
  284. else
  285. {
  286. buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY);
  287. }
  288. if (isField || frameFormat.IsInterlaced())
  289. {
  290. RentedBuffer prevBuffer = RentedBuffer.Empty;
  291. RentedBuffer nextBuffer = RentedBuffer.Empty;
  292. if (config.PrevFieldEnable)
  293. {
  294. prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY);
  295. }
  296. if (config.NextFieldEnable)
  297. {
  298. nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY);
  299. }
  300. int w = width * bytesPerPixel;
  301. switch (config.DeinterlaceMode)
  302. {
  303. case DeinterlaceMode.Weave:
  304. Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField);
  305. break;
  306. case DeinterlaceMode.BobField:
  307. Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField);
  308. break;
  309. case DeinterlaceMode.Bob:
  310. bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven;
  311. Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst());
  312. break;
  313. case DeinterlaceMode.NewBob:
  314. case DeinterlaceMode.Disi1:
  315. Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField);
  316. break;
  317. case DeinterlaceMode.WeaveLumaBobFieldChroma:
  318. if (isLuma)
  319. {
  320. Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField);
  321. }
  322. else
  323. {
  324. Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField);
  325. }
  326. break;
  327. default:
  328. Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\".");
  329. break;
  330. }
  331. prevBuffer.Return(rm.BufferPool);
  332. nextBuffer.Return(rm.BufferPool);
  333. }
  334. return buffer;
  335. }
  336. private static uint GetOffset(ref PlaneOffsets offsets, int plane)
  337. {
  338. return plane switch
  339. {
  340. 0 => offsets.LumaOffset,
  341. 1 => offsets.ChromaUOffset,
  342. 2 => offsets.ChromaVOffset,
  343. _ => throw new ArgumentOutOfRangeException(nameof(plane))
  344. };
  345. }
  346. private static RentedBuffer ReadBufferNoDeinterlace(
  347. ResourceManager rm,
  348. ref PlaneOffsets offsets,
  349. bool linear,
  350. int plane,
  351. int width,
  352. int height,
  353. int bytesPerPixel,
  354. int gobBlocksInY)
  355. {
  356. int stride = GetPitch(width, bytesPerPixel);
  357. uint offset = GetOffset(ref offsets, plane);
  358. if (linear)
  359. {
  360. return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel);
  361. }
  362. return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY);
  363. }
  364. private static RentedBuffer ReadBufferLinear(
  365. ResourceManager rm,
  366. uint offset,
  367. int width,
  368. int height,
  369. int dstStart,
  370. int dstStride,
  371. int bytesPerPixel)
  372. {
  373. int srcStride = GetPitch(width, bytesPerPixel);
  374. int inSize = srcStride * height;
  375. ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize);
  376. int outSize = dstStride * height;
  377. int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer);
  378. Span<byte> dst = buffer;
  379. dst = dst.Slice(0, outSize);
  380. for (int y = 0; y < height; y++)
  381. {
  382. src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride));
  383. }
  384. return new RentedBuffer(dst, bufferIndex);
  385. }
  386. private static RentedBuffer ReadBufferBlockLinear(
  387. ResourceManager rm,
  388. uint offset,
  389. int width,
  390. int height,
  391. int dstStart,
  392. int dstStride,
  393. int bytesPerPixel,
  394. int gobBlocksInY)
  395. {
  396. int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY);
  397. ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize);
  398. int outSize = dstStride * height;
  399. int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer);
  400. Span<byte> dst = buffer;
  401. dst = dst.Slice(0, outSize);
  402. LayoutConverter.ConvertBlockLinearToLinear(dst.Slice(dstStart), width, height, dstStride, bytesPerPixel, gobBlocksInY, src);
  403. return new RentedBuffer(dst, bufferIndex);
  404. }
  405. }
  406. }