DmaClass.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. using Ryujinx.Common;
  2. using Ryujinx.Graphics.Device;
  3. using Ryujinx.Graphics.Gpu.Engine.Threed;
  4. using Ryujinx.Graphics.Gpu.Memory;
  5. using Ryujinx.Graphics.Texture;
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Runtime.CompilerServices;
  9. using System.Runtime.InteropServices;
  10. using System.Runtime.Intrinsics;
  11. namespace Ryujinx.Graphics.Gpu.Engine.Dma
  12. {
  13. /// <summary>
  14. /// Represents a DMA copy engine class.
  15. /// </summary>
  16. class DmaClass : IDeviceState
  17. {
  18. private readonly GpuContext _context;
  19. private readonly GpuChannel _channel;
  20. private readonly ThreedClass _3dEngine;
  21. private readonly DeviceState<DmaClassState> _state;
  22. /// <summary>
  23. /// Copy flags passed on DMA launch.
  24. /// </summary>
  25. [Flags]
  26. private enum CopyFlags
  27. {
  28. SrcLinear = 1 << 7,
  29. DstLinear = 1 << 8,
  30. MultiLineEnable = 1 << 9,
  31. RemapEnable = 1 << 10
  32. }
  33. /// <summary>
  34. /// Texture parameters for copy.
  35. /// </summary>
  36. private struct TextureParams
  37. {
  38. /// <summary>
  39. /// Copy region X coordinate.
  40. /// </summary>
  41. public readonly int RegionX;
  42. /// <summary>
  43. /// Copy region Y coordinate.
  44. /// </summary>
  45. public readonly int RegionY;
  46. /// <summary>
  47. /// Offset from the base pointer of the data in memory.
  48. /// </summary>
  49. public readonly int BaseOffset;
  50. /// <summary>
  51. /// Bytes per pixel.
  52. /// </summary>
  53. public readonly int Bpp;
  54. /// <summary>
  55. /// Whether the texture is linear. If false, the texture is block linear.
  56. /// </summary>
  57. public readonly bool Linear;
  58. /// <summary>
  59. /// Pixel offset from XYZ coordinates calculator.
  60. /// </summary>
  61. public readonly OffsetCalculator Calculator;
  62. /// <summary>
  63. /// Creates texture parameters.
  64. /// </summary>
  65. /// <param name="regionX">Copy region X coordinate</param>
  66. /// <param name="regionY">Copy region Y coordinate</param>
  67. /// <param name="baseOffset">Offset from the base pointer of the data in memory</param>
  68. /// <param name="bpp">Bytes per pixel</param>
  69. /// <param name="linear">Whether the texture is linear. If false, the texture is block linear</param>
  70. /// <param name="calculator">Pixel offset from XYZ coordinates calculator</param>
  71. public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator)
  72. {
  73. RegionX = regionX;
  74. RegionY = regionY;
  75. BaseOffset = baseOffset;
  76. Bpp = bpp;
  77. Linear = linear;
  78. Calculator = calculator;
  79. }
  80. }
  81. [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)]
  82. private struct UInt24
  83. {
  84. public byte Byte0;
  85. public byte Byte1;
  86. public byte Byte2;
  87. }
  88. /// <summary>
  89. /// Creates a new instance of the DMA copy engine class.
  90. /// </summary>
  91. /// <param name="context">GPU context</param>
  92. /// <param name="channel">GPU channel</param>
  93. /// <param name="threedEngine">3D engine</param>
  94. public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
  95. {
  96. _context = context;
  97. _channel = channel;
  98. _3dEngine = threedEngine;
  99. _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
  100. {
  101. { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
  102. });
  103. }
  104. /// <summary>
  105. /// Reads data from the class registers.
  106. /// </summary>
  107. /// <param name="offset">Register byte offset</param>
  108. /// <returns>Data at the specified offset</returns>
  109. public int Read(int offset) => _state.Read(offset);
  110. /// <summary>
  111. /// Writes data to the class registers.
  112. /// </summary>
  113. /// <param name="offset">Register byte offset</param>
  114. /// <param name="data">Data to be written</param>
  115. public void Write(int offset, int data) => _state.Write(offset, data);
  116. /// <summary>
  117. /// Determine if a buffer-to-texture region covers the entirety of a texture.
  118. /// </summary>
  119. /// <param name="tex">Texture to compare</param>
  120. /// <param name="linear">True if the texture is linear, false if block linear</param>
  121. /// <param name="bpp">Texture bytes per pixel</param>
  122. /// <param name="stride">Texture stride</param>
  123. /// <param name="xCount">Number of pixels to be copied</param>
  124. /// <param name="yCount">Number of lines to be copied</param>
  125. /// <returns></returns>
  126. private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
  127. {
  128. if (linear)
  129. {
  130. // If the stride is negative, the texture has to be flipped, so
  131. // the fast copy is not trivial, use the slow path.
  132. if (stride <= 0)
  133. {
  134. return false;
  135. }
  136. int alignWidth = Constants.StrideAlignment / bpp;
  137. return stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
  138. }
  139. else
  140. {
  141. int alignWidth = Constants.GobAlignment / bpp;
  142. return tex.RegionX == 0 &&
  143. tex.RegionY == 0 &&
  144. tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
  145. tex.Height == yCount;
  146. }
  147. }
  148. /// <summary>
  149. /// Releases a semaphore for a given LaunchDma method call.
  150. /// </summary>
  151. /// <param name="argument">The LaunchDma call argument</param>
  152. private void ReleaseSemaphore(int argument)
  153. {
  154. LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3);
  155. if (type != LaunchDmaSemaphoreType.None)
  156. {
  157. ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB;
  158. if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore)
  159. {
  160. _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload);
  161. }
  162. else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
  163. {
  164. _channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
  165. _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
  166. }
  167. }
  168. }
  169. /// <summary>
  170. /// Performs a buffer to buffer, or buffer to texture copy.
  171. /// </summary>
  172. /// <param name="argument">The LaunchDma call argument</param>
  173. private void DmaCopy(int argument)
  174. {
  175. var memoryManager = _channel.MemoryManager;
  176. CopyFlags copyFlags = (CopyFlags)argument;
  177. bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
  178. bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
  179. bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
  180. bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
  181. uint size = _state.State.LineLengthIn;
  182. if (size == 0)
  183. {
  184. return;
  185. }
  186. ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
  187. ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
  188. int xCount = (int)_state.State.LineLengthIn;
  189. int yCount = (int)_state.State.LineCount;
  190. _3dEngine.CreatePendingSyncs();
  191. _3dEngine.FlushUboDirty();
  192. if (copy2D)
  193. {
  194. // Buffer to texture copy.
  195. int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
  196. int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1;
  197. int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
  198. int srcBpp = remap ? srcComponents * componentSize : 1;
  199. int dstBpp = remap ? dstComponents * componentSize : 1;
  200. var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize);
  201. var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize);
  202. int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0;
  203. if (!srcLinear)
  204. {
  205. srcRegionX = src.RegionX;
  206. srcRegionY = src.RegionY;
  207. }
  208. if (!dstLinear)
  209. {
  210. dstRegionX = dst.RegionX;
  211. dstRegionY = dst.RegionY;
  212. }
  213. int srcStride = (int)_state.State.PitchIn;
  214. int dstStride = (int)_state.State.PitchOut;
  215. var srcCalculator = new OffsetCalculator(
  216. src.Width,
  217. src.Height,
  218. srcStride,
  219. srcLinear,
  220. src.MemoryLayout.UnpackGobBlocksInY(),
  221. src.MemoryLayout.UnpackGobBlocksInZ(),
  222. srcBpp);
  223. var dstCalculator = new OffsetCalculator(
  224. dst.Width,
  225. dst.Height,
  226. dstStride,
  227. dstLinear,
  228. dst.MemoryLayout.UnpackGobBlocksInY(),
  229. dst.MemoryLayout.UnpackGobBlocksInZ(),
  230. dstBpp);
  231. (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount);
  232. (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount);
  233. if (srcLinear && srcStride < 0)
  234. {
  235. srcBaseOffset += srcStride * (yCount - 1);
  236. }
  237. if (dstLinear && dstStride < 0)
  238. {
  239. dstBaseOffset += dstStride * (yCount - 1);
  240. }
  241. ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
  242. bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
  243. bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
  244. if (completeSource && completeDest)
  245. {
  246. var target = memoryManager.Physical.TextureCache.FindTexture(
  247. memoryManager,
  248. dstGpuVa,
  249. dstBpp,
  250. dstStride,
  251. dst.Height,
  252. xCount,
  253. yCount,
  254. dstLinear,
  255. dst.MemoryLayout.UnpackGobBlocksInY(),
  256. dst.MemoryLayout.UnpackGobBlocksInZ());
  257. if (target != null)
  258. {
  259. byte[] data;
  260. if (srcLinear)
  261. {
  262. data = LayoutConverter.ConvertLinearStridedToLinear(
  263. target.Info.Width,
  264. target.Info.Height,
  265. 1,
  266. 1,
  267. xCount * srcBpp,
  268. srcStride,
  269. target.Info.FormatInfo.BytesPerPixel,
  270. srcSpan);
  271. }
  272. else
  273. {
  274. data = LayoutConverter.ConvertBlockLinearToLinear(
  275. src.Width,
  276. src.Height,
  277. src.Depth,
  278. 1,
  279. 1,
  280. 1,
  281. 1,
  282. 1,
  283. srcBpp,
  284. src.MemoryLayout.UnpackGobBlocksInY(),
  285. src.MemoryLayout.UnpackGobBlocksInZ(),
  286. 1,
  287. new SizeInfo((int)target.Size),
  288. srcSpan);
  289. }
  290. target.SynchronizeMemory();
  291. target.SetData(data);
  292. target.SignalModified();
  293. return;
  294. }
  295. else if (srcCalculator.LayoutMatches(dstCalculator))
  296. {
  297. // No layout conversion has to be performed, just copy the data entirely.
  298. memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
  299. return;
  300. }
  301. }
  302. // OPT: This allocates a (potentially) huge temporary array and then copies an existing
  303. // region of memory into it, data that might get overwritten entirely anyways. Ideally this should
  304. // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
  305. Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
  306. TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator);
  307. TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator);
  308. // If remapping is enabled, we always copy the components directly, in order.
  309. // If it's enabled, but the mapping is just XYZW, we also copy them in order.
  310. bool isIdentityRemap = !remap ||
  311. (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX &&
  312. (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) &&
  313. (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) &&
  314. (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW));
  315. if (isIdentityRemap)
  316. {
  317. // The order of the components doesn't change, so we can just copy directly
  318. // (with layout conversion if necessary).
  319. switch (srcBpp)
  320. {
  321. case 1: Copy<byte>(dstSpan, srcSpan, dstParams, srcParams); break;
  322. case 2: Copy<ushort>(dstSpan, srcSpan, dstParams, srcParams); break;
  323. case 4: Copy<uint>(dstSpan, srcSpan, dstParams, srcParams); break;
  324. case 8: Copy<ulong>(dstSpan, srcSpan, dstParams, srcParams); break;
  325. case 12: Copy<Bpp12Pixel>(dstSpan, srcSpan, dstParams, srcParams); break;
  326. case 16: Copy<Vector128<byte>>(dstSpan, srcSpan, dstParams, srcParams); break;
  327. default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.");
  328. }
  329. }
  330. else
  331. {
  332. // The order or value of the components might change.
  333. switch (componentSize)
  334. {
  335. case 1: CopyShuffle<byte>(dstSpan, srcSpan, dstParams, srcParams); break;
  336. case 2: CopyShuffle<ushort>(dstSpan, srcSpan, dstParams, srcParams); break;
  337. case 3: CopyShuffle<UInt24>(dstSpan, srcSpan, dstParams, srcParams); break;
  338. case 4: CopyShuffle<uint>(dstSpan, srcSpan, dstParams, srcParams); break;
  339. default: throw new NotSupportedException($"Unable to copy ${componentSize} component size.");
  340. }
  341. }
  342. memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
  343. }
  344. else
  345. {
  346. if (remap &&
  347. _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
  348. _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
  349. _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
  350. _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
  351. _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
  352. _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
  353. _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
  354. {
  355. // Fast path for clears when remap is enabled.
  356. memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
  357. }
  358. else
  359. {
  360. // TODO: Implement remap functionality.
  361. // Buffer to buffer copy.
  362. bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch();
  363. bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch();
  364. if (!srcIsPitchKind && dstIsPitchKind)
  365. {
  366. CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size);
  367. }
  368. else if (srcIsPitchKind && !dstIsPitchKind)
  369. {
  370. CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size);
  371. }
  372. else
  373. {
  374. memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
  375. }
  376. }
  377. }
  378. }
  379. /// <summary>
  380. /// Copies data from one texture to another, while performing layout conversion if necessary.
  381. /// </summary>
  382. /// <typeparam name="T">Pixel type</typeparam>
  383. /// <param name="dstSpan">Destination texture memory region</param>
  384. /// <param name="srcSpan">Source texture memory region</param>
  385. /// <param name="dst">Destination texture parameters</param>
  386. /// <param name="src">Source texture parameters</param>
  387. private unsafe void Copy<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
  388. {
  389. int xCount = (int)_state.State.LineLengthIn;
  390. int yCount = (int)_state.State.LineCount;
  391. if (src.Linear && dst.Linear && src.Bpp == dst.Bpp)
  392. {
  393. // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
  394. // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
  395. for (int y = 0; y < yCount; y++)
  396. {
  397. src.Calculator.SetY(src.RegionY + y);
  398. dst.Calculator.SetY(dst.RegionY + y);
  399. int srcOffset = src.Calculator.GetOffset(src.RegionX);
  400. int dstOffset = dst.Calculator.GetOffset(dst.RegionX);
  401. srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp)
  402. .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp));
  403. }
  404. }
  405. else
  406. {
  407. fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
  408. {
  409. byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
  410. byte* srcBase = srcPtr - src.BaseOffset;
  411. for (int y = 0; y < yCount; y++)
  412. {
  413. src.Calculator.SetY(src.RegionY + y);
  414. dst.Calculator.SetY(dst.RegionY + y);
  415. for (int x = 0; x < xCount; x++)
  416. {
  417. int srcOffset = src.Calculator.GetOffset(src.RegionX + x);
  418. int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
  419. *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
  420. }
  421. }
  422. }
  423. }
  424. }
  425. /// <summary>
  426. /// Sets texture pixel data to a constant value, while performing layout conversion if necessary.
  427. /// </summary>
  428. /// <typeparam name="T">Pixel type</typeparam>
  429. /// <param name="dstSpan">Destination texture memory region</param>
  430. /// <param name="dst">Destination texture parameters</param>
  431. /// <param name="fillValue">Constant pixel value to be set</param>
  432. private unsafe void Fill<T>(Span<byte> dstSpan, TextureParams dst, T fillValue) where T : unmanaged
  433. {
  434. int xCount = (int)_state.State.LineLengthIn;
  435. int yCount = (int)_state.State.LineCount;
  436. fixed (byte* dstPtr = dstSpan)
  437. {
  438. byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
  439. for (int y = 0; y < yCount; y++)
  440. {
  441. dst.Calculator.SetY(dst.RegionY + y);
  442. for (int x = 0; x < xCount; x++)
  443. {
  444. int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
  445. *(T*)(dstBase + dstOffset) = fillValue;
  446. }
  447. }
  448. }
  449. }
  450. /// <summary>
  451. /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary.
  452. /// </summary>
  453. /// <typeparam name="T">Pixel type</typeparam>
  454. /// <param name="dstSpan">Destination texture memory region</param>
  455. /// <param name="srcSpan">Source texture memory region</param>
  456. /// <param name="dst">Destination texture parameters</param>
  457. /// <param name="src">Source texture parameters</param>
  458. private void CopyShuffle<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
  459. {
  460. int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
  461. for (int i = 0; i < dstComponents; i++)
  462. {
  463. SetRemapComponentsDst componentsDst = i switch
  464. {
  465. 0 => _state.State.SetRemapComponentsDstX,
  466. 1 => _state.State.SetRemapComponentsDstY,
  467. 2 => _state.State.SetRemapComponentsDstZ,
  468. _ => _state.State.SetRemapComponentsDstW
  469. };
  470. switch (componentsDst)
  471. {
  472. case SetRemapComponentsDst.SrcX:
  473. Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan, dst, src);
  474. break;
  475. case SetRemapComponentsDst.SrcY:
  476. Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>()), dst, src);
  477. break;
  478. case SetRemapComponentsDst.SrcZ:
  479. Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 2), dst, src);
  480. break;
  481. case SetRemapComponentsDst.SrcW:
  482. Copy<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), srcSpan.Slice(Unsafe.SizeOf<T>() * 3), dst, src);
  483. break;
  484. case SetRemapComponentsDst.ConstA:
  485. Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstA));
  486. break;
  487. case SetRemapComponentsDst.ConstB:
  488. Fill<T>(dstSpan.Slice(Unsafe.SizeOf<T>() * i), dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstB));
  489. break;
  490. }
  491. }
  492. }
  493. /// <summary>
  494. /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
  495. /// </summary>
  496. /// <param name="memoryManager">GPU memory manager</param>
  497. /// <param name="srcGpuVa">Source GPU virtual address</param>
  498. /// <param name="dstGpuVa">Destination GPU virtual address</param>
  499. /// <param name="size">Size in bytes of the copy</param>
  500. private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
  501. {
  502. if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
  503. {
  504. for (ulong offset = 0; offset < size; offset += 16)
  505. {
  506. Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
  507. memoryManager.Write(dstGpuVa + offset, data);
  508. }
  509. }
  510. else
  511. {
  512. for (ulong offset = 0; offset < size; offset++)
  513. {
  514. byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
  515. memoryManager.Write(dstGpuVa + offset, data);
  516. }
  517. }
  518. }
  519. /// <summary>
  520. /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs.
  521. /// </summary>
  522. /// <param name="memoryManager">GPU memory manager</param>
  523. /// <param name="srcGpuVa">Source GPU virtual address</param>
  524. /// <param name="dstGpuVa">Destination GPU virtual address</param>
  525. /// <param name="size">Size in bytes of the copy</param>
  526. private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
  527. {
  528. if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
  529. {
  530. for (ulong offset = 0; offset < size; offset += 16)
  531. {
  532. Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true);
  533. memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
  534. }
  535. }
  536. else
  537. {
  538. for (ulong offset = 0; offset < size; offset++)
  539. {
  540. byte data = memoryManager.Read<byte>(srcGpuVa + offset, true);
  541. memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
  542. }
  543. }
  544. }
  545. /// <summary>
  546. /// Calculates the GOB block linear address from a linear address.
  547. /// </summary>
  548. /// <param name="address">Linear address</param>
  549. /// <returns>Block linear address</returns>
  550. private static ulong ConvertGobLinearToBlockLinearAddress(ulong address)
  551. {
  552. // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0
  553. return (address & ~0x1f0UL) |
  554. ((address & 0x40) >> 2) |
  555. ((address & 0x10) << 1) |
  556. ((address & 0x180) >> 1) |
  557. ((address & 0x20) << 3);
  558. }
  559. /// <summary>
  560. /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore.
  561. /// </summary>
  562. /// <param name="argument">Method call argument</param>
  563. private void LaunchDma(int argument)
  564. {
  565. DmaCopy(argument);
  566. ReleaseSemaphore(argument);
  567. }
  568. }
  569. }