DmaClass.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. using Ryujinx.Common;
  2. using Ryujinx.Graphics.Device;
  3. using Ryujinx.Graphics.Gpu.Engine.Threed;
  4. using Ryujinx.Graphics.Gpu.Memory;
  5. using Ryujinx.Graphics.Texture;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Runtime.CompilerServices;
  9. using System.Runtime.Intrinsics;
  10. namespace Ryujinx.Graphics.Gpu.Engine.Dma
  11. {
  12. /// <summary>
  13. /// Represents a DMA copy engine class.
  14. /// </summary>
  15. class DmaClass : IDeviceState
  16. {
  17. private readonly GpuContext _context;
  18. private readonly GpuChannel _channel;
  19. private readonly ThreedClass _3dEngine;
  20. private readonly DeviceState<DmaClassState> _state;
  21. /// <summary>
  22. /// Copy flags passed on DMA launch.
  23. /// </summary>
  24. [Flags]
  25. private enum CopyFlags
  26. {
  27. SrcLinear = 1 << 7,
  28. DstLinear = 1 << 8,
  29. MultiLineEnable = 1 << 9,
  30. RemapEnable = 1 << 10
  31. }
  32. /// <summary>
  33. /// Creates a new instance of the DMA copy engine class.
  34. /// </summary>
  35. /// <param name="context">GPU context</param>
  36. /// <param name="channel">GPU channel</param>
  37. /// <param name="threedEngine">3D engine</param>
  38. public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
  39. {
  40. _context = context;
  41. _channel = channel;
  42. _3dEngine = threedEngine;
  43. _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
  44. {
  45. { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
  46. });
  47. }
  48. /// <summary>
  49. /// Reads data from the class registers.
  50. /// </summary>
  51. /// <param name="offset">Register byte offset</param>
  52. /// <returns>Data at the specified offset</returns>
  53. public int Read(int offset) => _state.Read(offset);
  54. /// <summary>
  55. /// Writes data to the class registers.
  56. /// </summary>
  57. /// <param name="offset">Register byte offset</param>
  58. /// <param name="data">Data to be written</param>
  59. public void Write(int offset, int data) => _state.Write(offset, data);
  60. /// <summary>
  61. /// Determine if a buffer-to-texture region covers the entirety of a texture.
  62. /// </summary>
  63. /// <param name="tex">Texture to compare</param>
  64. /// <param name="linear">True if the texture is linear, false if block linear</param>
  65. /// <param name="bpp">Texture bytes per pixel</param>
  66. /// <param name="stride">Texture stride</param>
  67. /// <param name="xCount">Number of pixels to be copied</param>
  68. /// <param name="yCount">Number of lines to be copied</param>
  69. /// <returns></returns>
  70. private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
  71. {
  72. if (linear)
  73. {
  74. // If the stride is negative, the texture has to be flipped, so
  75. // the fast copy is not trivial, use the slow path.
  76. if (stride <= 0)
  77. {
  78. return false;
  79. }
  80. int alignWidth = Constants.StrideAlignment / bpp;
  81. return stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
  82. }
  83. else
  84. {
  85. int alignWidth = Constants.GobAlignment / bpp;
  86. return tex.RegionX == 0 &&
  87. tex.RegionY == 0 &&
  88. tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
  89. tex.Height == yCount;
  90. }
  91. }
  92. /// <summary>
  93. /// Releases a semaphore for a given LaunchDma method call.
  94. /// </summary>
  95. /// <param name="argument">The LaunchDma call argument</param>
  96. private void ReleaseSemaphore(int argument)
  97. {
  98. LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3);
  99. if (type != LaunchDmaSemaphoreType.None)
  100. {
  101. ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB;
  102. if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore)
  103. {
  104. _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload);
  105. }
  106. else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
  107. {
  108. _channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
  109. _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
  110. }
  111. }
  112. }
  113. /// <summary>
  114. /// Performs a buffer to buffer, or buffer to texture copy.
  115. /// </summary>
  116. /// <param name="argument">The LaunchDma call argument</param>
  117. private void DmaCopy(int argument)
  118. {
  119. var memoryManager = _channel.MemoryManager;
  120. CopyFlags copyFlags = (CopyFlags)argument;
  121. bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
  122. bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
  123. bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
  124. bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
  125. uint size = _state.State.LineLengthIn;
  126. if (size == 0)
  127. {
  128. return;
  129. }
  130. ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
  131. ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
  132. int xCount = (int)_state.State.LineLengthIn;
  133. int yCount = (int)_state.State.LineCount;
  134. _3dEngine.CreatePendingSyncs();
  135. _3dEngine.FlushUboDirty();
  136. if (copy2D)
  137. {
  138. // Buffer to texture copy.
  139. int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
  140. int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1;
  141. int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1;
  142. var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize);
  143. var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize);
  144. int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0;
  145. if (!srcLinear)
  146. {
  147. srcRegionX = src.RegionX;
  148. srcRegionY = src.RegionY;
  149. }
  150. if (!dstLinear)
  151. {
  152. dstRegionX = dst.RegionX;
  153. dstRegionY = dst.RegionY;
  154. }
  155. int srcStride = (int)_state.State.PitchIn;
  156. int dstStride = (int)_state.State.PitchOut;
  157. var srcCalculator = new OffsetCalculator(
  158. src.Width,
  159. src.Height,
  160. srcStride,
  161. srcLinear,
  162. src.MemoryLayout.UnpackGobBlocksInY(),
  163. src.MemoryLayout.UnpackGobBlocksInZ(),
  164. srcBpp);
  165. var dstCalculator = new OffsetCalculator(
  166. dst.Width,
  167. dst.Height,
  168. dstStride,
  169. dstLinear,
  170. dst.MemoryLayout.UnpackGobBlocksInY(),
  171. dst.MemoryLayout.UnpackGobBlocksInZ(),
  172. dstBpp);
  173. (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount);
  174. (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount);
  175. if (srcLinear && srcStride < 0)
  176. {
  177. srcBaseOffset += srcStride * (yCount - 1);
  178. }
  179. if (dstLinear && dstStride < 0)
  180. {
  181. dstBaseOffset += dstStride * (yCount - 1);
  182. }
  183. ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
  184. bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
  185. bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
  186. if (completeSource && completeDest)
  187. {
  188. var target = memoryManager.Physical.TextureCache.FindTexture(
  189. memoryManager,
  190. dstGpuVa,
  191. dstBpp,
  192. dstStride,
  193. dst.Height,
  194. xCount,
  195. yCount,
  196. dstLinear,
  197. dst.MemoryLayout.UnpackGobBlocksInY(),
  198. dst.MemoryLayout.UnpackGobBlocksInZ());
  199. if (target != null)
  200. {
  201. byte[] data;
  202. if (srcLinear)
  203. {
  204. data = LayoutConverter.ConvertLinearStridedToLinear(
  205. target.Info.Width,
  206. target.Info.Height,
  207. 1,
  208. 1,
  209. xCount * srcBpp,
  210. srcStride,
  211. target.Info.FormatInfo.BytesPerPixel,
  212. srcSpan);
  213. }
  214. else
  215. {
  216. data = LayoutConverter.ConvertBlockLinearToLinear(
  217. src.Width,
  218. src.Height,
  219. src.Depth,
  220. 1,
  221. 1,
  222. 1,
  223. 1,
  224. 1,
  225. srcBpp,
  226. src.MemoryLayout.UnpackGobBlocksInY(),
  227. src.MemoryLayout.UnpackGobBlocksInZ(),
  228. 1,
  229. new SizeInfo((int)target.Size),
  230. srcSpan);
  231. }
  232. target.SynchronizeMemory();
  233. target.SetData(data);
  234. target.SignalModified();
  235. return;
  236. }
  237. else if (srcCalculator.LayoutMatches(dstCalculator))
  238. {
  239. // No layout conversion has to be performed, just copy the data entirely.
  240. memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
  241. return;
  242. }
  243. }
  244. unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
  245. {
  246. if (srcLinear && dstLinear && srcBpp == dstBpp)
  247. {
  248. // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
  249. // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
  250. for (int y = 0; y < yCount; y++)
  251. {
  252. srcCalculator.SetY(srcRegionY + y);
  253. dstCalculator.SetY(dstRegionY + y);
  254. int srcOffset = srcCalculator.GetOffset(srcRegionX);
  255. int dstOffset = dstCalculator.GetOffset(dstRegionX);
  256. srcSpan.Slice(srcOffset - srcBaseOffset, xCount * srcBpp)
  257. .CopyTo(dstSpan.Slice(dstOffset - dstBaseOffset, xCount * dstBpp));
  258. }
  259. }
  260. else
  261. {
  262. fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
  263. {
  264. byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
  265. byte* srcBase = srcPtr - srcBaseOffset;
  266. for (int y = 0; y < yCount; y++)
  267. {
  268. srcCalculator.SetY(srcRegionY + y);
  269. dstCalculator.SetY(dstRegionY + y);
  270. for (int x = 0; x < xCount; x++)
  271. {
  272. int srcOffset = srcCalculator.GetOffset(srcRegionX + x);
  273. int dstOffset = dstCalculator.GetOffset(dstRegionX + x);
  274. *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
  275. }
  276. }
  277. }
  278. }
  279. return true;
  280. }
  281. // OPT: This allocates a (potentially) huge temporary array and then copies an existing
  282. // region of memory into it, data that might get overwritten entirely anyways. Ideally this should
  283. // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
  284. Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
  285. bool _ = srcBpp switch
  286. {
  287. 1 => Convert<byte>(dstSpan, srcSpan),
  288. 2 => Convert<ushort>(dstSpan, srcSpan),
  289. 4 => Convert<uint>(dstSpan, srcSpan),
  290. 8 => Convert<ulong>(dstSpan, srcSpan),
  291. 12 => Convert<Bpp12Pixel>(dstSpan, srcSpan),
  292. 16 => Convert<Vector128<byte>>(dstSpan, srcSpan),
  293. _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
  294. };
  295. memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
  296. }
  297. else
  298. {
  299. if (remap &&
  300. _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
  301. _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
  302. _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
  303. _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
  304. _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
  305. _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
  306. _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
  307. {
  308. // Fast path for clears when remap is enabled.
  309. memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
  310. }
  311. else
  312. {
  313. // TODO: Implement remap functionality.
  314. // Buffer to buffer copy.
  315. bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch();
  316. bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch();
  317. if (!srcIsPitchKind && dstIsPitchKind)
  318. {
  319. CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size);
  320. }
  321. else if (srcIsPitchKind && !dstIsPitchKind)
  322. {
  323. CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size);
  324. }
  325. else
  326. {
  327. memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
  328. }
  329. }
  330. }
  331. }
  332. /// <summary>
  333. /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
  334. /// </summary>
  335. /// <param name="memoryManager">GPU memory manager</param>
  336. /// <param name="srcGpuVa">Source GPU virtual address</param>
  337. /// <param name="dstGpuVa">Destination GPU virtual address</param>
  338. /// <param name="size">Size in bytes of the copy</param>
  339. private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
  340. {
  341. if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
  342. {
  343. for (ulong offset = 0; offset < size; offset += 16)
  344. {
  345. Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
  346. memoryManager.Write(dstGpuVa + offset, data);
  347. }
  348. }
  349. else
  350. {
  351. for (ulong offset = 0; offset < size; offset++)
  352. {
  353. byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
  354. memoryManager.Write(dstGpuVa + offset, data);
  355. }
  356. }
  357. }
  358. /// <summary>
  359. /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs.
  360. /// </summary>
  361. /// <param name="memoryManager">GPU memory manager</param>
  362. /// <param name="srcGpuVa">Source GPU virtual address</param>
  363. /// <param name="dstGpuVa">Destination GPU virtual address</param>
  364. /// <param name="size">Size in bytes of the copy</param>
  365. private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
  366. {
  367. if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
  368. {
  369. for (ulong offset = 0; offset < size; offset += 16)
  370. {
  371. Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true);
  372. memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
  373. }
  374. }
  375. else
  376. {
  377. for (ulong offset = 0; offset < size; offset++)
  378. {
  379. byte data = memoryManager.Read<byte>(srcGpuVa + offset, true);
  380. memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
  381. }
  382. }
  383. }
  384. /// <summary>
  385. /// Calculates the GOB block linear address from a linear address.
  386. /// </summary>
  387. /// <param name="address">Linear address</param>
  388. /// <returns>Block linear address</returns>
  389. private static ulong ConvertGobLinearToBlockLinearAddress(ulong address)
  390. {
  391. // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0
  392. return (address & ~0x1f0UL) |
  393. ((address & 0x40) >> 2) |
  394. ((address & 0x10) << 1) |
  395. ((address & 0x180) >> 1) |
  396. ((address & 0x20) << 3);
  397. }
  398. /// <summary>
  399. /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore.
  400. /// </summary>
  401. /// <param name="argument">Method call argument</param>
  402. private void LaunchDma(int argument)
  403. {
  404. DmaCopy(argument);
  405. ReleaseSemaphore(argument);
  406. }
  407. }
  408. }